R Markdown
library(readr)
library(Hotelling)
library(car)
library(stats)
library(biotools)
# Load the dataset
BreastCancer_data <- read_csv("~/Downloads/wdbc.data")
names(BreastCancer_data) <- c('ID', 'Diagnosis', 'mean_radius', 'mean_texture', 'mean_perimeter', 'mean_area', 'mean_smoothness', 'mean_compactness', 'mean_concavity', 'mean_concave_points', 'mean_symmetry', 'mean_fractal_dimension', 'se_radius', 'se_texture', 'se_perimeter', 'se_area', 'se_smoothness', 'se_compactness', 'se_concavity', 'se_concave_points', 'se_symmetry', 'se_fractal_dimension', 'worst_radius', 'worst_texture', 'worst_perimeter', 'worst_area', 'worst_smoothness', 'worst_compactness', 'worst_concavity', 'worst_concave_points', 'worst_symmetry', 'worst_fractal_dimension')
BreastCancer_data
## # A tibble: 568 × 32
## ID Diagnosis mean_rad…¹ mean_…² mean_…³ mean_…⁴ mean_…⁵ mean_…⁶ mean_…⁷
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 842517 M 20.6 17.8 133. 1326 0.0847 0.0786 0.0869
## 2 84300903 M 19.7 21.2 130 1203 0.110 0.160 0.197
## 3 84348301 M 11.4 20.4 77.6 386. 0.142 0.284 0.241
## 4 84358402 M 20.3 14.3 135. 1297 0.100 0.133 0.198
## 5 843786 M 12.4 15.7 82.6 477. 0.128 0.17 0.158
## 6 844359 M 18.2 20.0 120. 1040 0.0946 0.109 0.113
## 7 84458202 M 13.7 20.8 90.2 578. 0.119 0.164 0.0937
## 8 844981 M 13 21.8 87.5 520. 0.127 0.193 0.186
## 9 84501001 M 12.5 24.0 84.0 476. 0.119 0.240 0.227
## 10 845636 M 16.0 23.2 103. 798. 0.0821 0.0667 0.0330
## # … with 558 more rows, 23 more variables: mean_concave_points <dbl>,
## # mean_symmetry <dbl>, mean_fractal_dimension <dbl>, se_radius <dbl>,
## # se_texture <dbl>, se_perimeter <dbl>, se_area <dbl>, se_smoothness <dbl>,
## # se_compactness <dbl>, se_concavity <dbl>, se_concave_points <dbl>,
## # se_symmetry <dbl>, se_fractal_dimension <dbl>, worst_radius <dbl>,
## # worst_texture <dbl>, worst_perimeter <dbl>, worst_area <dbl>,
## # worst_smoothness <dbl>, worst_compactness <dbl>, worst_concavity <dbl>, …
BC_data <- BreastCancer_data
BC_data
## # A tibble: 568 × 32
## ID Diagnosis mean_rad…¹ mean_…² mean_…³ mean_…⁴ mean_…⁵ mean_…⁶ mean_…⁷
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 842517 M 20.6 17.8 133. 1326 0.0847 0.0786 0.0869
## 2 84300903 M 19.7 21.2 130 1203 0.110 0.160 0.197
## 3 84348301 M 11.4 20.4 77.6 386. 0.142 0.284 0.241
## 4 84358402 M 20.3 14.3 135. 1297 0.100 0.133 0.198
## 5 843786 M 12.4 15.7 82.6 477. 0.128 0.17 0.158
## 6 844359 M 18.2 20.0 120. 1040 0.0946 0.109 0.113
## 7 84458202 M 13.7 20.8 90.2 578. 0.119 0.164 0.0937
## 8 844981 M 13 21.8 87.5 520. 0.127 0.193 0.186
## 9 84501001 M 12.5 24.0 84.0 476. 0.119 0.240 0.227
## 10 845636 M 16.0 23.2 103. 798. 0.0821 0.0667 0.0330
## # … with 558 more rows, 23 more variables: mean_concave_points <dbl>,
## # mean_symmetry <dbl>, mean_fractal_dimension <dbl>, se_radius <dbl>,
## # se_texture <dbl>, se_perimeter <dbl>, se_area <dbl>, se_smoothness <dbl>,
## # se_compactness <dbl>, se_concavity <dbl>, se_concave_points <dbl>,
## # se_symmetry <dbl>, se_fractal_dimension <dbl>, worst_radius <dbl>,
## # worst_texture <dbl>, worst_perimeter <dbl>, worst_area <dbl>,
## # worst_smoothness <dbl>, worst_compactness <dbl>, worst_concavity <dbl>, …
attach(BC_data)
str(BC_data)
## spc_tbl_ [568 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ID : num [1:568] 842517 84300903 84348301 84358402 843786 ...
## $ Diagnosis : chr [1:568] "M" "M" "M" "M" ...
## $ mean_radius : num [1:568] 20.6 19.7 11.4 20.3 12.4 ...
## $ mean_texture : num [1:568] 17.8 21.2 20.4 14.3 15.7 ...
## $ mean_perimeter : num [1:568] 132.9 130 77.6 135.1 82.6 ...
## $ mean_area : num [1:568] 1326 1203 386 1297 477 ...
## $ mean_smoothness : num [1:568] 0.0847 0.1096 0.1425 0.1003 0.1278 ...
## $ mean_compactness : num [1:568] 0.0786 0.1599 0.2839 0.1328 0.17 ...
## $ mean_concavity : num [1:568] 0.0869 0.1974 0.2414 0.198 0.1578 ...
## $ mean_concave_points : num [1:568] 0.0702 0.1279 0.1052 0.1043 0.0809 ...
## $ mean_symmetry : num [1:568] 0.181 0.207 0.26 0.181 0.209 ...
## $ mean_fractal_dimension : num [1:568] 0.0567 0.06 0.0974 0.0588 0.0761 ...
## $ se_radius : num [1:568] 0.543 0.746 0.496 0.757 0.335 ...
## $ se_texture : num [1:568] 0.734 0.787 1.156 0.781 0.89 ...
## $ se_perimeter : num [1:568] 3.4 4.58 3.44 5.44 2.22 ...
## $ se_area : num [1:568] 74.1 94 27.2 94.4 27.2 ...
## $ se_smoothness : num [1:568] 0.00522 0.00615 0.00911 0.01149 0.00751 ...
## $ se_compactness : num [1:568] 0.0131 0.0401 0.0746 0.0246 0.0335 ...
## $ se_concavity : num [1:568] 0.0186 0.0383 0.0566 0.0569 0.0367 ...
## $ se_concave_points : num [1:568] 0.0134 0.0206 0.0187 0.0188 0.0114 ...
## $ se_symmetry : num [1:568] 0.0139 0.0225 0.0596 0.0176 0.0216 ...
## $ se_fractal_dimension : num [1:568] 0.00353 0.00457 0.00921 0.00511 0.00508 ...
## $ worst_radius : num [1:568] 25 23.6 14.9 22.5 15.5 ...
## $ worst_texture : num [1:568] 23.4 25.5 26.5 16.7 23.8 ...
## $ worst_perimeter : num [1:568] 158.8 152.5 98.9 152.2 103.4 ...
## $ worst_area : num [1:568] 1956 1709 568 1575 742 ...
## $ worst_smoothness : num [1:568] 0.124 0.144 0.21 0.137 0.179 ...
## $ worst_compactness : num [1:568] 0.187 0.424 0.866 0.205 0.525 ...
## $ worst_concavity : num [1:568] 0.242 0.45 0.687 0.4 0.535 ...
## $ worst_concave_points : num [1:568] 0.186 0.243 0.258 0.163 0.174 ...
## $ worst_symmetry : num [1:568] 0.275 0.361 0.664 0.236 0.399 ...
## $ worst_fractal_dimension: num [1:568] 0.089 0.0876 0.173 0.0768 0.1244 ...
## - attr(*, "spec")=
## .. cols(
## .. `842302` = col_double(),
## .. M = col_character(),
## .. `17.99` = col_double(),
## .. `10.38` = col_double(),
## .. `122.8` = col_double(),
## .. `1001` = col_double(),
## .. `0.1184` = col_double(),
## .. `0.2776` = col_double(),
## .. `0.3001` = col_double(),
## .. `0.1471` = col_double(),
## .. `0.2419` = col_double(),
## .. `0.07871` = col_double(),
## .. `1.095` = col_double(),
## .. `0.9053` = col_double(),
## .. `8.589` = col_double(),
## .. `153.4` = col_double(),
## .. `0.006399` = col_double(),
## .. `0.04904` = col_double(),
## .. `0.05373` = col_double(),
## .. `0.01587` = col_double(),
## .. `0.03003` = col_double(),
## .. `0.006193` = col_double(),
## .. `25.38` = col_double(),
## .. `17.33` = col_double(),
## .. `184.6` = col_double(),
## .. `2019` = col_double(),
## .. `0.1622` = col_double(),
## .. `0.6656` = col_double(),
## .. `0.7119` = col_double(),
## .. `0.2654` = col_double(),
## .. `0.4601` = col_double(),
## .. `0.1189` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
BC_data$Diagnosis <- as.factor(BC_data$Diagnosis)
BC_data_x <- BC_data[, 3:32]
BC_data_x
## # A tibble: 568 × 30
## mean_radius mean_te…¹ mean_…² mean_…³ mean_…⁴ mean_…⁵ mean_…⁶ mean_…⁷ mean_…⁸
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 20.6 17.8 133. 1326 0.0847 0.0786 0.0869 0.0702 0.181
## 2 19.7 21.2 130 1203 0.110 0.160 0.197 0.128 0.207
## 3 11.4 20.4 77.6 386. 0.142 0.284 0.241 0.105 0.260
## 4 20.3 14.3 135. 1297 0.100 0.133 0.198 0.104 0.181
## 5 12.4 15.7 82.6 477. 0.128 0.17 0.158 0.0809 0.209
## 6 18.2 20.0 120. 1040 0.0946 0.109 0.113 0.074 0.179
## 7 13.7 20.8 90.2 578. 0.119 0.164 0.0937 0.0598 0.220
## 8 13 21.8 87.5 520. 0.127 0.193 0.186 0.0935 0.235
## 9 12.5 24.0 84.0 476. 0.119 0.240 0.227 0.0854 0.203
## 10 16.0 23.2 103. 798. 0.0821 0.0667 0.0330 0.0332 0.153
## # … with 558 more rows, 21 more variables: mean_fractal_dimension <dbl>,
## # se_radius <dbl>, se_texture <dbl>, se_perimeter <dbl>, se_area <dbl>,
## # se_smoothness <dbl>, se_compactness <dbl>, se_concavity <dbl>,
## # se_concave_points <dbl>, se_symmetry <dbl>, se_fractal_dimension <dbl>,
## # worst_radius <dbl>, worst_texture <dbl>, worst_perimeter <dbl>,
## # worst_area <dbl>, worst_smoothness <dbl>, worst_compactness <dbl>,
## # worst_concavity <dbl>, worst_concave_points <dbl>, worst_symmetry <dbl>, …
BC_data_cm <- colMeans(BC_data_x)
BC_data_S <- cov(BC_data_x)
BC_data_d <- apply(BC_data_x, MARGIN = 1, function(BC_data_x)t(BC_data_x - BC_data_cm) %*% solve(BC_data_S) %*% (BC_data_x - BC_data_cm))
BC_data_cm
## mean_radius mean_texture mean_perimeter
## 1.412049e+01 1.930533e+01 9.191475e+01
## mean_area mean_smoothness mean_compactness
## 6.542798e+02 9.632148e-02 1.040360e-01
## mean_concavity mean_concave_points mean_symmetry
## 8.842731e-02 4.874629e-02 1.810549e-01
## mean_fractal_dimension se_radius se_texture
## 6.276960e-02 4.039576e-01 1.217402e+00
## se_perimeter se_area se_smoothness
## 2.855984e+00 4.013802e+01 7.042109e-03
## se_compactness se_concavity se_concave_points
## 2.543666e-02 3.185527e-02 1.178896e-02
## se_symmetry se_fractal_dimension worst_radius
## 2.052560e-02 3.790682e-03 1.625315e+01
## worst_texture worst_perimeter worst_area
## 2.569192e+01 1.071251e+02 8.785789e+02
## worst_smoothness worst_compactness worst_concavity
## 1.323161e-01 2.535409e-01 2.714143e-01
## worst_concave_points worst_symmetry worst_fractal_dimension
## 1.143407e-01 2.897762e-01 8.388428e-02
BC_data_S
## mean_radius mean_texture mean_perimeter
## mean_radius 1.241446e+01 4.977041e+00 8.538744e+01
## mean_texture 4.977041e+00 1.839128e+01 3.498582e+01
## mean_perimeter 8.538744e+01 3.498582e+01 5.898024e+02
## mean_area 1.224281e+03 4.922992e+02 8.431797e+03
## mean_smoothness 8.318960e-03 -1.070339e-03 6.976046e-02
## mean_compactness 9.318078e-02 5.658896e-02 7.065370e-01
## mean_concavity 1.890209e-01 1.072014e-01 1.378170e+00
## mean_concave_points 1.120034e-01 5.060880e-02 7.984274e-01
## mean_symmetry 1.388384e-02 9.389823e-03 1.188281e-01
## mean_fractal_dimension -7.875976e-03 -2.074770e-03 -4.580476e-02
## se_radius 6.601130e-01 3.404765e-01 4.632046e+00
## se_texture -1.873961e-01 9.134076e-01 -1.148069e+00
## se_perimeter 4.772965e+00 2.543855e+00 3.380135e+01
## se_area 1.174046e+02 5.271029e+01 8.187864e+02
## se_smoothness -2.355109e-03 7.545494e-05 -1.477929e-02
## se_compactness 1.286264e-02 1.518357e-02 1.080202e-01
## se_concavity 2.054624e-02 1.898047e-02 1.664018e-01
## se_concave_points 8.166187e-03 4.420177e-03 6.094047e-02
## se_symmetry -3.109091e-03 4.744283e-04 -1.694215e-02
## se_fractal_dimension -4.146920e-04 6.586149e-04 -4.863904e-04
## worst_radius 1.648070e+01 7.485610e+00 1.135626e+02
## worst_texture 6.501412e+00 2.402127e+01 4.579262e+01
## worst_perimeter 1.139623e+02 5.305461e+01 7.895129e+02
## worst_area 1.883788e+03 8.606878e+02 1.298711e+04
## worst_smoothness 9.438014e-03 8.094079e-03 8.204788e-02
## worst_compactness 2.268464e-01 1.948166e-01 1.723145e+00
## worst_concavity 3.850688e-01 2.775081e-01 2.839595e+00
## worst_concave_points 1.716677e-01 8.601180e-02 1.225807e+00
## worst_symmetry 3.464846e-02 3.066769e-02 2.755397e-01
## worst_fractal_dimension 2.119824e-04 9.826686e-03 2.052601e-02
## mean_area mean_smoothness mean_compactness
## mean_radius 1.224281e+03 8.318960e-03 9.318078e-02
## mean_texture 4.922992e+02 -1.070339e-03 5.658896e-02
## mean_perimeter 8.431797e+03 6.976046e-02 7.065370e-01
## mean_area 1.238503e+05 8.642461e-01 9.175323e+00
## mean_smoothness 8.642461e-01 1.972903e-04 4.836908e-04
## mean_compactness 9.175323e+00 4.836908e-04 2.741070e-03
## mean_concavity 1.914966e+01 5.780471e-04 3.660011e-03
## mean_concave_points 1.120175e+01 2.988769e-04 1.676183e-03
## mean_symmetry 1.425029e+00 2.130687e-04 8.554644e-04
## mean_fractal_dimension -7.143974e-01 5.755138e-05 2.063140e-04
## se_radius 7.119520e+01 1.150983e-03 7.087509e-03
## se_texture -1.269935e+01 5.437959e-04 1.443878e-03
## se_perimeter 5.144223e+02 8.211561e-03 5.696347e-02
## se_area 1.276197e+04 1.536177e-01 1.062030e+00
## se_smoothness -1.761392e-01 1.408523e-05 2.168893e-05
## se_compactness 1.327680e+00 7.955419e-05 6.926883e-04
## se_concavity 2.196490e+00 1.047898e-04 9.044430e-04
## se_concave_points 8.073949e-01 3.293454e-05 2.084158e-04
## se_symmetry -2.170701e-01 2.301361e-05 9.767365e-05
## se_fractal_dimension -2.001763e-02 1.047957e-05 7.028679e-05
## worst_radius 1.634838e+03 1.415769e-02 1.340950e-01
## worst_texture 6.280260e+02 3.448643e-03 8.324134e-02
## worst_perimeter 1.131450e+04 1.100670e-01 1.025586e+00
## worst_area 1.918354e+05 1.613890e+00 1.500198e+01
## worst_smoothness 9.760221e-01 2.578987e-04 6.740252e-04
## worst_compactness 2.140320e+01 1.031305e-03 7.081106e-03
## worst_concavity 3.743191e+01 1.261252e-03 8.875002e-03
## worst_concave_points 1.663904e+01 4.600052e-04 2.790102e-03
## worst_symmetry 3.027352e+00 3.370779e-04 1.617994e-03
## worst_fractal_dimension 2.423656e-03 1.256969e-04 6.461268e-04
## mean_concavity mean_concave_points mean_symmetry
## mean_radius 1.890209e-01 1.120034e-01 1.388384e-02
## mean_texture 1.072014e-01 5.060880e-02 9.389823e-03
## mean_perimeter 1.378170e+00 7.984274e-01 1.188281e-01
## mean_area 1.914966e+01 1.120175e+01 1.425029e+00
## mean_smoothness 5.780471e-04 2.988769e-04 2.130687e-04
## mean_compactness 3.660011e-03 1.676183e-03 8.554644e-04
## mean_concavity 6.287574e-03 2.818564e-03 1.073442e-03
## mean_concave_points 2.818564e-03 1.491285e-03 4.823144e-04
## mean_symmetry 1.073442e-03 4.823144e-04 7.463505e-04
## mean_fractal_dimension 1.839527e-04 4.304948e-05 9.134733e-05
## se_radius 1.373727e-02 7.404969e-03 2.236431e-03
## se_texture 3.474098e-03 5.146358e-04 1.973398e-03
## se_perimeter 1.044943e-01 5.485873e-02 1.681490e-02
## se_area 2.200860e+00 1.201038e+00 2.676738e-01
## se_smoothness 2.387351e-05 3.338825e-06 1.551484e-05
## se_compactness 9.498062e-04 3.373040e-04 2.048459e-04
## se_concavity 1.658273e-03 5.115180e-04 2.816906e-04
## se_concave_points 3.351633e-04 1.469513e-04 6.620805e-05
## se_symmetry 1.139719e-04 2.899277e-05 1.009432e-04
## se_fractal_dimension 9.404932e-05 2.607809e-05 2.385292e-05
## worst_radius 2.622474e-01 1.544149e-01 2.367461e-02
## worst_texture 1.503095e-01 7.139010e-02 1.619689e-02
## worst_perimeter 1.928925e+00 1.104569e+00 1.939526e-01
## worst_area 3.031152e+01 1.772095e+01 2.648439e+00
## worst_smoothness 8.072486e-04 3.966553e-04 2.643405e-04
## worst_compactness 9.332580e-03 4.010709e-03 2.000497e-03
## worst_concavity 1.456572e-02 6.025305e-03 2.437763e-03
## worst_concave_points 4.465145e-03 2.299381e-03 7.605824e-04
## worst_symmetry 1.959591e-03 8.741219e-04 1.170789e-03
## worst_fractal_dimension 7.296756e-04 2.527607e-04 2.137065e-04
## mean_fractal_dimension se_radius se_texture
## mean_radius -7.875976e-03 6.601130e-01 -1.873961e-01
## mean_texture -2.074770e-03 3.404765e-01 9.134076e-01
## mean_perimeter -4.580476e-02 4.632046e+00 -1.148069e+00
## mean_area -7.143974e-01 7.119520e+01 -1.269935e+01
## mean_smoothness 5.755138e-05 1.150983e-03 5.437959e-04
## mean_compactness 2.063140e-04 7.087509e-03 1.443878e-03
## mean_concavity 1.839527e-04 1.373727e-02 3.474098e-03
## mean_concave_points 4.304948e-05 7.404969e-03 5.146358e-04
## mean_symmetry 9.134733e-05 2.236431e-03 1.973398e-03
## mean_fractal_dimension 4.948929e-05 -1.917584e-05 6.493176e-04
## se_radius -1.917584e-05 7.619724e-02 3.305964e-02
## se_texture 6.493176e-04 3.305964e-02 3.046812e-01
## se_perimeter 4.086837e-04 5.394188e-01 2.525033e-01
## se_area -3.219083e-02 1.189094e+01 2.866957e+00
## se_smoothness 8.554267e-06 1.380045e-04 6.587740e-04
## se_compactness 7.024720e-05 1.742678e-03 2.305972e-03
## se_concavity 9.474186e-05 2.760451e-03 3.264879e-03
## se_concave_points 1.477579e-05 8.749705e-04 7.874699e-04
## se_symmetry 1.990465e-05 5.408787e-04 1.885577e-03
## se_fractal_dimension 1.281108e-05 1.644951e-04 4.103517e-04
## worst_radius -8.928486e-03 9.490021e-01 -2.933045e-01
## worst_texture -1.994071e-03 3.427805e-01 1.384605e+00
## worst_perimeter -5.093144e-02 6.623880e+00 -1.856009e+00
## worst_area -9.656728e-01 1.174838e+02 -2.554974e+01
## worst_smoothness 8.070420e-05 8.638162e-04 -9.129688e-04
## worst_compactness 4.989919e-04 1.204749e-02 -7.810953e-03
## worst_concavity 4.985273e-04 2.152133e-02 -7.707941e-03
## worst_concave_points 7.727167e-05 9.513715e-03 -4.262841e-03
## worst_symmetry 1.413789e-04 1.417677e-03 -4.289981e-03
## worst_fractal_dimension 9.703487e-05 2.060610e-04 -4.364398e-04
## se_perimeter se_area se_smoothness se_compactness
## mean_radius 4.772965e+00 1.174046e+02 -2.355109e-03 1.286264e-02
## mean_texture 2.543855e+00 5.271029e+01 7.545494e-05 1.518357e-02
## mean_perimeter 3.380135e+01 8.187864e+02 -1.477929e-02 1.080202e-01
## mean_area 5.144223e+02 1.276197e+04 -1.761392e-01 1.327680e+00
## mean_smoothness 8.211561e-03 1.536177e-01 1.408523e-05 7.955419e-05
## mean_compactness 5.696347e-02 1.062030e+00 2.168893e-05 6.926883e-04
## mean_concavity 1.044943e-01 2.200860e+00 2.387351e-05 9.498062e-04
## mean_concave_points 5.485873e-02 1.201038e+00 3.338825e-06 3.373040e-04
## mean_symmetry 1.681490e-02 2.676738e-01 1.551484e-05 2.048459e-04
## mean_fractal_dimension 4.086837e-04 -3.219083e-02 8.554267e-06 7.024720e-05
## se_radius 5.394188e-01 1.189094e+01 1.380045e-04 1.742678e-03
## se_texture 2.525033e-01 2.866957e+00 6.587740e-04 2.305972e-03
## se_perimeter 4.037240e+00 8.525089e+01 9.252348e-04 1.486244e-02
## se_area 8.525089e+01 2.050496e+03 1.041094e-02 2.277507e-01
## se_smoothness 9.252348e-04 1.041094e-02 9.030286e-06 1.816268e-05
## se_compactness 1.486244e-02 2.277507e-01 1.816268e-05 3.202877e-04
## se_concavity 2.194114e-02 3.682852e-01 2.441974e-05 4.330025e-04
## se_concave_points 6.910675e-03 1.160842e-01 6.099966e-06 8.219549e-05
## se_symmetry 4.365829e-03 4.862480e-02 1.029205e-05 5.813967e-05
## se_fractal_dimension 1.284213e-03 1.484377e-02 3.404143e-06 3.803125e-05
## worst_radius 6.733017e+00 1.649966e+02 -3.343330e-03 1.736165e-02
## worst_texture 2.578768e+00 5.670466e+01 -1.391226e-03 1.611529e-02
## worst_perimeter 4.829089e+01 1.150203e+03 -2.187521e-02 1.538251e-01
## worst_area 8.311382e+02 2.082563e+04 -3.107221e-01 1.989014e+00
## worst_smoothness 5.712771e-03 1.245089e-01 2.162942e-05 9.190075e-05
## worst_compactness 1.048013e-01 1.948788e+00 -2.582572e-05 1.898788e-03
## worst_concavity 1.725605e-01 3.573414e+00 -3.608367e-05 2.373811e-03
## worst_concave_points 7.235189e-02 1.581957e+00 -1.999679e-05 5.635329e-04
## worst_symmetry 1.205598e-02 1.750265e-01 -1.978199e-05 3.013360e-04
## worst_fractal_dimension 2.771839e-03 7.453815e-03 5.552556e-06 1.900289e-04
## se_concavity se_concave_points se_symmetry
## mean_radius 2.054624e-02 8.166187e-03 -3.109091e-03
## mean_texture 1.898047e-02 4.420177e-03 4.744283e-04
## mean_perimeter 1.664018e-01 6.094047e-02 -1.694215e-02
## mean_area 2.196490e+00 8.073949e-01 -2.170701e-01
## mean_smoothness 1.047898e-04 3.293454e-05 2.301361e-05
## mean_compactness 9.044430e-04 2.084158e-04 9.767365e-05
## mean_concavity 1.658273e-03 3.351633e-04 1.139719e-04
## mean_concave_points 5.115180e-04 1.469513e-04 2.899277e-05
## mean_symmetry 2.816906e-04 6.620805e-05 1.009432e-04
## mean_fractal_dimension 9.474186e-05 1.477579e-05 1.990465e-05
## se_radius 2.760451e-03 8.749705e-04 5.408787e-04
## se_texture 3.264879e-03 7.874699e-04 1.885577e-03
## se_perimeter 2.194114e-02 6.910675e-03 4.365829e-03
## se_area 3.682852e-01 1.160842e-01 4.862480e-02
## se_smoothness 2.441974e-05 6.099966e-06 1.029205e-05
## se_compactness 4.330025e-04 8.219549e-05 5.813967e-05
## se_concavity 9.119629e-04 1.438500e-04 7.698160e-05
## se_concave_points 1.438500e-04 3.811024e-05 1.591348e-05
## se_symmetry 7.698160e-05 1.591348e-05 6.829438e-05
## se_fractal_dimension 5.810841e-05 9.976860e-06 8.047034e-06
## worst_radius 2.696517e-02 1.063349e-02 -5.280606e-03
## worst_texture 1.895267e-02 3.355469e-03 -3.803232e-03
## worst_perimeter 2.273502e-01 8.148585e-02 -3.016683e-02
## worst_area 3.198939e+00 1.196358e+00 -5.393287e-01
## worst_smoothness 1.151748e-04 3.017795e-05 -2.894072e-06
## worst_compactness 2.290961e-03 4.374838e-04 7.161077e-05
## worst_concavity 4.162923e-03 7.055574e-04 5.675643e-05
## worst_concave_points 8.697091e-04 2.436915e-04 -1.908251e-05
## worst_symmetry 3.634677e-04 5.350555e-05 1.966492e-04
## worst_fractal_dimension 2.385956e-04 3.442980e-05 1.109199e-05
## se_fractal_dimension worst_radius worst_texture
## mean_radius -4.146920e-04 1.648070e+01 6.501412e+00
## mean_texture 6.586149e-04 7.485610e+00 2.402127e+01
## mean_perimeter -4.863904e-04 1.135626e+02 4.579262e+01
## mean_area -2.001763e-02 1.634838e+03 6.280260e+02
## mean_smoothness 1.047957e-05 1.415769e-02 3.448643e-03
## mean_compactness 7.028679e-05 1.340950e-01 8.324134e-02
## mean_concavity 9.404932e-05 2.622474e-01 1.503095e-01
## mean_concave_points 2.607809e-05 1.544149e-01 7.139010e-02
## mean_symmetry 2.385292e-05 2.367461e-02 1.619689e-02
## mean_fractal_dimension 1.281108e-05 -8.928486e-03 -1.994071e-03
## se_radius 1.644951e-04 9.490021e-01 3.427805e-01
## se_texture 4.103517e-04 -2.933045e-01 1.384605e+00
## se_perimeter 1.284213e-03 6.733017e+00 2.578768e+00
## se_area 1.484377e-02 1.649966e+02 5.670466e+01
## se_smoothness 3.404143e-06 -3.343330e-03 -1.391226e-03
## se_compactness 3.803125e-05 1.736165e-02 1.611529e-02
## se_concavity 5.810841e-05 2.696517e-02 1.895267e-02
## se_concave_points 9.976860e-06 1.063349e-02 3.355469e-03
## se_symmetry 8.047034e-06 -5.280606e-03 -3.803232e-03
## se_fractal_dimension 7.003880e-06 -5.188800e-04 -1.668750e-05
## worst_radius -5.188800e-04 2.325477e+01 1.084515e+01
## worst_texture -1.668750e-05 1.084515e+01 3.772001e+01
## worst_perimeter -4.167831e-04 1.604270e+02 7.667725e+01
## worst_area -3.913711e-02 2.694301e+03 1.229169e+03
## worst_smoothness 1.019688e-05 2.346196e-02 3.213115e-02
## worst_compactness 1.609760e-04 3.558521e-01 3.556175e-01
## worst_concavity 2.082661e-04 5.726996e-01 4.796577e-01
## worst_concave_points 3.685808e-05 2.481787e-01 1.478239e-01
## worst_symmetry 1.749841e-05 7.021180e-02 9.127322e-02
## worst_fractal_dimension 2.816214e-05 7.613078e-03 2.488300e-02
## worst_perimeter worst_area worst_smoothness
## mean_radius 1.139623e+02 1.883788e+03 9.438014e-03
## mean_texture 5.305461e+01 8.606878e+02 8.094079e-03
## mean_perimeter 7.895129e+02 1.298711e+04 8.204788e-02
## mean_area 1.131450e+04 1.918354e+05 9.760221e-01
## mean_smoothness 1.100670e-01 1.613890e+00 2.578987e-04
## mean_compactness 1.025586e+00 1.500198e+01 6.740252e-04
## mean_concavity 1.928925e+00 3.031152e+01 8.072486e-04
## mean_concave_points 1.104569e+00 1.772095e+01 3.966553e-04
## mean_symmetry 1.939526e-01 2.648439e+00 2.643405e-04
## mean_fractal_dimension -5.093144e-02 -9.656728e-01 8.070420e-05
## se_radius 6.623880e+00 1.174838e+02 8.638162e-04
## se_texture -1.856009e+00 -2.554974e+01 -9.129688e-04
## se_perimeter 4.829089e+01 8.311382e+02 5.712771e-03
## se_area 1.150203e+03 2.082563e+04 1.245089e-01
## se_smoothness -2.187521e-02 -3.107221e-01 2.162942e-05
## se_compactness 1.538251e-01 1.989014e+00 9.190075e-05
## se_concavity 2.273502e-01 3.198939e+00 1.151748e-04
## se_concave_points 8.148585e-02 1.196358e+00 3.017795e-05
## se_symmetry -3.016683e-02 -5.393287e-01 -2.894072e-06
## se_fractal_dimension -4.167831e-04 -3.913711e-02 1.019688e-05
## worst_radius 1.604270e+02 2.694301e+03 2.346196e-02
## worst_texture 7.667725e+01 1.229169e+03 3.213115e-02
## worst_perimeter 1.120555e+03 1.858030e+04 1.779042e-01
## worst_area 1.858030e+04 3.224494e+05 2.663643e+00
## worst_smoothness 1.779042e-01 2.663643e+00 5.206670e-04
## worst_compactness 2.747660e+00 3.850478e+01 2.023059e-03
## worst_concavity 4.282344e+00 6.376714e+01 2.451114e-03
## worst_concave_points 1.785646e+00 2.771831e+01 8.154925e-04
## worst_symmetry 5.380056e-01 7.038101e+00 6.898575e-04
## worst_fractal_dimension 7.970614e-02 7.501753e-01 2.533044e-04
## worst_compactness worst_concavity worst_concave_points
## mean_radius 2.268464e-01 3.850688e-01 1.716677e-01
## mean_texture 1.948166e-01 2.775081e-01 8.601180e-02
## mean_perimeter 1.723145e+00 2.839595e+00 1.225807e+00
## mean_area 2.140320e+01 3.743191e+01 1.663904e+01
## mean_smoothness 1.031305e-03 1.261252e-03 4.600052e-04
## mean_compactness 7.081106e-03 8.875002e-03 2.790102e-03
## mean_concavity 9.332580e-03 1.456572e-02 4.465145e-03
## mean_concave_points 4.010709e-03 6.025305e-03 2.299381e-03
## mean_symmetry 2.000497e-03 2.437763e-03 7.605824e-04
## mean_fractal_dimension 4.989919e-04 4.985273e-04 7.727167e-05
## se_radius 1.204749e-02 2.152133e-02 9.513715e-03
## se_texture -7.810953e-03 -7.707941e-03 -4.262841e-03
## se_perimeter 1.048013e-01 1.725605e-01 7.235189e-02
## se_area 1.948788e+00 3.573414e+00 1.581957e+00
## se_smoothness -2.582572e-05 -3.608367e-05 -1.999679e-05
## se_compactness 1.898788e-03 2.373811e-03 5.635329e-04
## se_concavity 2.290961e-03 4.162923e-03 8.697091e-04
## se_concave_points 4.374838e-04 7.055574e-04 2.436915e-04
## se_symmetry 7.161077e-05 5.675643e-05 -1.908251e-05
## se_fractal_dimension 1.609760e-04 2.082661e-04 3.685808e-05
## worst_radius 3.558521e-01 5.726996e-01 2.481787e-01
## worst_texture 3.556175e-01 4.796577e-01 1.478239e-01
## worst_perimeter 2.747660e+00 4.282344e+00 1.785646e+00
## worst_area 3.850478e+01 6.376714e+01 2.771831e+01
## worst_smoothness 2.023059e-03 2.451114e-03 8.154925e-04
## worst_compactness 2.449950e-02 2.901986e-02 8.189874e-03
## worst_concavity 2.901986e-02 4.325925e-02 1.163442e-02
## worst_concave_points 8.189874e-03 1.163442e-02 4.288187e-03
## worst_symmetry 5.867956e-03 6.753297e-03 2.001936e-03
## worst_fractal_dimension 2.281726e-03 2.564193e-03 5.985573e-04
## worst_symmetry worst_fractal_dimension
## mean_radius 3.464846e-02 2.119824e-04
## mean_texture 3.066769e-02 9.826686e-03
## mean_perimeter 2.755397e-01 2.052601e-02
## mean_area 3.027352e+00 2.423656e-03
## mean_smoothness 3.370779e-04 1.256969e-04
## mean_compactness 1.617994e-03 6.461268e-04
## mean_concavity 1.959591e-03 7.296756e-04
## mean_concave_points 8.741219e-04 2.527607e-04
## mean_symmetry 1.170789e-03 2.137065e-04
## mean_fractal_dimension 1.413789e-04 9.703487e-05
## se_radius 1.417677e-03 2.060610e-04
## se_texture -4.289981e-03 -4.364398e-04
## se_perimeter 1.205598e-02 2.771839e-03
## se_area 1.750265e-01 7.453815e-03
## se_smoothness -1.978199e-05 5.552556e-06
## se_compactness 3.013360e-04 1.900289e-04
## se_concavity 3.634677e-04 2.385956e-04
## se_concave_points 5.350555e-05 3.442980e-05
## se_symmetry 1.966492e-04 1.109199e-05
## se_fractal_dimension 1.749841e-05 2.816214e-05
## worst_radius 7.021180e-02 7.613078e-03
## worst_texture 9.127322e-02 2.488300e-02
## worst_perimeter 5.380056e-01 7.970614e-02
## worst_area 7.038101e+00 7.501753e-01
## worst_smoothness 6.898575e-04 2.533044e-04
## worst_compactness 5.867956e-03 2.281726e-03
## worst_concavity 6.753297e-03 2.564193e-03
## worst_concave_points 2.001936e-03 5.985573e-04
## worst_symmetry 3.783260e-03 5.915541e-04
## worst_fractal_dimension 5.915541e-04 3.246261e-04
BC_data_d
## [1] 36.667197 23.587986 112.518389 26.801116 15.316434 15.131692
## [7] 21.579784 24.116227 144.087507 15.835311 31.651373 212.494449
## [13] 18.990047 38.923422 40.834902 17.716272 26.852218 43.225484
## [19] 8.807045 19.171669 16.083660 35.186789 42.512636 52.103396
## [25] 55.330558 48.565139 17.372671 63.909825 19.527496 40.192355
## [31] 74.196901 36.879241 47.557286 33.468788 53.488055 29.951792
## [37] 33.076086 166.813504 45.940652 14.525002 29.581374 79.999212
## [43] 20.065582 21.460275 31.930043 18.475442 21.314217 16.870705
## [49] 12.537252 8.640141 11.081646 8.353036 32.279053 17.025551
## [55] 9.718971 23.652443 29.441335 12.066498 34.906088 47.045653
## [61] 36.480597 42.820648 39.089492 27.071896 18.132008 11.960760
## [67] 12.964031 204.163191 9.053891 18.944406 180.997802 51.619864
## [73] 18.007802 9.797171 20.309723 54.314039 50.261261 166.481675
## [79] 8.093857 15.493664 31.914619 57.996254 121.789652 10.800407
## [85] 18.464826 18.157440 81.503022 7.645095 21.477422 9.363843
## [91] 25.858059 21.638014 7.953269 23.094160 18.060358 21.276553
## [97] 20.001961 15.205335 12.366376 17.099828 31.140292 14.390717
## [103] 18.874653 14.195865 52.460176 13.671968 13.991013 87.703228
## [109] 28.728496 15.529098 21.498910 69.422699 23.224978 32.405429
## [115] 13.064464 78.902296 18.016960 35.561899 57.616343 17.114034
## [121] 18.576523 180.235051 12.055470 13.714251 8.534301 28.910988
## [127] 17.371286 34.041954 42.054084 16.034471 18.593046 13.499616
## [133] 7.780729 15.622984 9.000889 26.521176 9.556351 92.985574
## [139] 37.548288 15.186098 10.482433 13.709361 9.421971 10.618210
## [145] 35.583307 56.290588 20.750952 10.839386 8.484166 31.440429
## [151] 47.027318 407.281422 6.470556 19.372757 5.881398 25.051525
## [157] 26.483863 7.265333 17.616248 22.867060 45.738700 43.455413
## [163] 8.648729 50.769829 8.593120 14.682473 8.760763 31.297537
## [169] 8.719040 8.993878 23.303286 32.652732 41.841917 19.763072
## [175] 17.079488 51.439484 14.148564 12.738971 12.271847 114.836085
## [181] 57.345254 17.604365 24.018885 15.772771 25.848054 16.162110
## [187] 10.159596 15.181586 10.665832 136.565036 41.273304 179.751929
## [193] 24.230384 32.118070 5.906184 31.365338 26.031409 46.970267
## [199] 36.361764 13.164590 16.717662 75.846605 50.317964 6.926243
## [205] 21.395483 12.511838 11.641920 33.425179 12.219284 43.115379
## [211] 3.769314 384.270077 195.365979 31.711997 14.733200 14.519925
## [217] 15.843525 16.421448 57.097027 12.728694 8.770411 6.320684
## [223] 24.940159 6.222561 12.395297 11.382751 17.176997 17.049395
## [229] 22.830308 31.131396 22.339761 28.090667 19.690154 22.598415
## [235] 9.797729 56.434154 29.974650 25.330203 83.429866 5.911516
## [241] 9.136950 40.729016 13.334693 37.617394 23.966606 16.261485
## [247] 45.876739 14.113270 6.741288 34.373060 7.364382 63.590826
## [253] 11.003199 29.742042 10.247246 61.877214 42.358093 111.628859
## [259] 42.807409 23.653490 13.409193 36.715027 18.058459 22.320302
## [265] 147.417525 19.228770 10.102311 12.754618 19.096065 12.471804
## [271] 6.048037 41.184330 21.865571 15.064117 56.363470 11.799692
## [277] 18.509471 8.484269 10.617139 30.999484 33.582910 27.720768
## [283] 17.508010 17.887351 8.385739 24.323579 10.242419 118.602249
## [289] 10.771496 198.402167 17.535833 18.673926 10.766471 7.697537
## [295] 7.537412 24.081654 23.881691 11.839186 31.026455 24.968612
## [301] 7.952909 51.866444 10.260615 8.071643 12.137848 16.319935
## [307] 28.652340 12.549339 9.447631 10.533826 7.510773 6.536269
## [313] 13.530545 107.097743 9.268292 10.777252 9.974760 68.479207
## [319] 27.296647 21.947082 13.604140 15.753985 57.236231 6.620470
## [325] 8.528651 9.779115 8.813885 12.060779 42.137081 16.428167
## [331] 16.490720 23.299316 11.649334 8.839215 31.389514 23.581687
## [337] 31.590820 10.947917 36.982144 9.164293 24.936416 10.876828
## [343] 34.165419 11.406886 29.531503 7.835953 14.006836 16.579419
## [349] 30.104684 9.130813 46.707102 75.717427 23.998415 17.256451
## [355] 14.305175 9.899024 10.330235 31.373195 27.616237 9.082854
## [361] 8.197965 7.857608 10.007260 6.538524 18.127290 41.142837
## [367] 6.523646 89.634579 45.515606 39.313162 11.503723 38.582228
## [373] 28.126988 9.103849 23.334030 73.252582 17.295187 18.273431
## [379] 109.820092 12.999115 12.832716 31.206006 27.661093 14.340886
## [385] 15.341622 9.752638 9.621680 26.200652 54.915406 8.834377
## [391] 46.741969 24.013561 40.494086 12.419250 12.089921 12.766116
## [397] 12.800060 9.866949 8.198265 76.777954 17.756611 12.581550
## [403] 12.872430 12.708060 10.751121 9.015515 21.415281 15.320827
## [409] 14.776513 43.816799 12.451719 18.547662 15.296034 17.794555
## [415] 15.253142 21.200067 52.689119 16.940206 10.118723 9.088551
## [421] 27.849975 12.865884 8.376759 35.963997 11.145466 9.577649
## [427] 14.473193 7.807181 7.978864 50.930062 19.473940 25.915100
## [433] 24.886236 7.149467 14.478616 9.721094 6.210904 6.380264
## [439] 7.036898 25.867067 24.252014 13.010991 55.210978 21.194870
## [445] 20.251328 20.807840 14.134999 13.749539 32.082829 21.081001
## [451] 24.887721 10.204475 16.852411 8.850729 20.919542 18.513568
## [457] 9.613669 9.467330 17.094967 30.772828 301.538886 15.404104
## [463] 10.841414 15.010132 54.065043 13.973575 14.032799 52.048900
## [469] 19.310975 18.366408 27.613582 11.350126 45.287871 17.233547
## [475] 12.659231 16.026020 13.675411 12.431026 17.941804 13.376819
## [481] 8.327706 15.243519 8.703592 19.811809 49.615019 10.821895
## [487] 25.438088 8.963553 56.095954 8.985001 17.537813 8.963215
## [493] 25.366779 12.032657 9.425982 12.991169 8.909391 20.817665
## [499] 24.037451 25.401774 27.885619 12.078650 58.188371 90.067123
## [505] 104.870556 26.281934 32.450820 17.090232 27.618727 17.548288
## [511] 8.463969 19.937371 9.146607 10.502094 9.689786 13.054305
## [517] 32.144437 30.988341 11.264036 35.771796 47.583571 9.055134
## [523] 13.712336 14.966901 20.568718 22.782114 11.318284 65.565319
## [529] 10.084803 16.669169 10.116316 11.210394 23.195931 13.301509
## [535] 29.603703 19.438959 30.155492 41.581324 69.477383 15.318609
## [541] 16.361906 12.780377 15.195602 13.698070 6.761204 7.578989
## [547] 13.855770 11.194511 15.540137 17.957272 14.290149 15.486300
## [553] 20.903709 15.306381 25.422980 17.903320 46.345839 17.253330
## [559] 25.766159 19.811145 62.726243 96.214378 55.809378 41.154514
## [565] 20.525661 13.441639 63.874806 70.552489
# t-tests, one by one. Benign vs Malignant
with(data=BC_data,t.test(mean_radius[Diagnosis=="M"],mean_radius[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_radius[Diagnosis == "M"] and mean_radius[Diagnosis == "B"]
## t = 25.365, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4.902325 5.725291
## sample estimates:
## mean of x mean of y
## 17.46033 12.14652
## the p-value (=p-value < 2.2e-16) is less than the significance level (typically 0.05), we can reject the null hypothesis that the mean radius of the two groups is equal. In other words, we can conclude that there is a significant difference in the mean radius between the benign and malignant groups.
with(data=BC_data,t.test(mean_texture[Diagnosis=="M"],mean_texture[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_texture[Diagnosis == "M"] and mean_texture[Diagnosis == "B"]
## t = 11.079, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3.079680 4.407004
## sample estimates:
## mean of x mean of y
## 21.65810 17.91476
## the p-value (=p-value < 2.2e-16) is less than the significance level (typically 0.05), we can reject the null hypothesis that the mean_texture of the two groups is equal. In other words, we can conclude that there is a significant difference in the mean_texture between the benign and malignant groups.
with(data=BC_data,t.test(mean_perimeter[Diagnosis=="M"],mean_perimeter[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_perimeter[Diagnosis == "M"] and mean_perimeter[Diagnosis == "B"]
## t = 26.323, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 34.47484 40.03463
## sample estimates:
## mean of x mean of y
## 115.33014 78.07541
## the p-value (=p-value < 2.2e-16) is less than the significance level (typically 0.05), we can reject the null hypothesis that the mean_perimeter of the two groups is equal. In other words, we can conclude that there is a significant difference in the mean_perimeter between the benign and malignant groups.
with(data=BC_data,t.test(mean_area[Diagnosis=="M"],mean_area[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_area[Diagnosis == "M"] and mean_area[Diagnosis == "B"]
## t = 23.877, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 473.0753 557.8827
## sample estimates:
## mean of x mean of y
## 978.2692 462.7902
## the p-value (=p-value < 2.2e-16) is less than the significance level (typically 0.05), we can reject the null hypothesis that the mean_area of the two groups is equal. In other words, we can conclude that there is a significant difference in the mean_area between the benign and malignant groups.
with(data=BC_data,t.test(mean_smoothness[Diagnosis=="M"],mean_smoothness[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_smoothness[Diagnosis == "M"] and mean_smoothness[Diagnosis == "B"]
## t = 9.0713, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.008106919 0.012587834
## sample estimates:
## mean of x mean of y
## 0.10282502 0.09247765
with(data=BC_data,t.test(mean_compactness[Diagnosis=="M"],mean_compactness[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_compactness[Diagnosis == "M"] and mean_compactness[Diagnosis == "B"]
## t = 17.639, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.05729614 0.07165509
## sample estimates:
## mean of x mean of y
## 0.14456024 0.08008462
with(data=BC_data,t.test(mean_concavity[Diagnosis=="M"],mean_concavity[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_concavity[Diagnosis == "M"] and mean_concavity[Diagnosis == "B"]
## t = 23.038, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1043325 0.1237811
## sample estimates:
## mean of x mean of y
## 0.16011441 0.04605762
with(data=BC_data,t.test(mean_concave_points[Diagnosis=="M"],mean_concave_points[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_concave_points[Diagnosis == "M"] and mean_concave_points[Diagnosis == "B"]
## t = 29.305, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.05783743 0.06614747
## sample estimates:
## mean of x mean of y
## 0.08770986 0.02571741
with(data=BC_data,t.test(mean_symmetry[Diagnosis=="M"],mean_symmetry[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_symmetry[Diagnosis == "M"] and mean_symmetry[Diagnosis == "B"]
## t = 8.2416, df = 566, p-value = 1.188e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01408401 0.02289756
## sample estimates:
## mean of x mean of y
## 0.1926768 0.1741860
##p-value = 1.188e-15
with(data=BC_data,t.test(mean_fractal_dimension[Diagnosis=="M"],mean_fractal_dimension[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: mean_fractal_dimension[Diagnosis == "M"] and mean_fractal_dimension[Diagnosis == "B"]
## t = -0.43066, df = 566, p-value = 0.6669
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0014639990 0.0009374555
## sample estimates:
## mean of x mean of y
## 0.06260412 0.06286739
## p-value = 0.6669 In this case, the p-value is much larger than 0.05, indicating that we do not have enough evidence to reject the null hypothesis. Therefore, we fail to reject the null hypothesis and conclude that there is not enough evidence to support the alternative hypothesis.
with(data=BC_data,t.test(se_radius[Diagnosis=="M"],se_radius[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_radius[Diagnosis == "M"] and se_radius[Diagnosis == "B"]
## t = 16.307, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2838289 0.3615657
## sample estimates:
## mean of x mean of y
## 0.6067796 0.2840824
## p-value < 2.2e-16
with(data=BC_data,t.test(se_texture[Diagnosis=="M"],se_texture[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_texture[Diagnosis == "M"] and se_texture[Diagnosis == "B"]
## t = -0.16712, df = 566, p-value = 0.8673
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.10224335 0.08620919
## sample estimates:
## mean of x mean of y
## 1.212363 1.220380
## p-value = 0.8673
with(data=BC_data,t.test(se_perimeter[Diagnosis=="M"],se_perimeter[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_perimeter[Diagnosis == "M"] and se_perimeter[Diagnosis == "B"]
## t = 15.849, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.017931 2.588858
## sample estimates:
## mean of x mean of y
## 4.303716 2.000321
## p-value < 2.2e-16
with(data=BC_data,t.test(se_area[Diagnosis=="M"],se_area[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_area[Diagnosis == "M"] and se_area[Diagnosis == "B"]
## t = 15.519, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 44.68016 57.62916
## sample estimates:
## mean of x mean of y
## 72.28981 21.13515
## p-value < 2.2e-16
with(data=BC_data,t.test(se_smoothness[Diagnosis=="M"],se_smoothness[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_smoothness[Diagnosis == "M"] and se_smoothness[Diagnosis == "B"]
## t = -1.5887, df = 566, p-value = 0.1127
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.258536e-04 9.785064e-05
## sample estimates:
## mean of x mean of y
## 0.006781900 0.007195902
## p-value = 0.1127
with(data=BC_data,t.test(se_compactness[Diagnosis=="M"],se_compactness[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_compactness[Diagnosis == "M"] and se_compactness[Diagnosis == "B"]
## t = 7.2326, df = 566, p-value = 1.546e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.007840453 0.013686533
## sample estimates:
## mean of x mean of y
## 0.03220174 0.02143825
## p-value = 1.546e-12
with(data=BC_data,t.test(se_concavity[Diagnosis=="M"],se_concavity[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_concavity[Diagnosis == "M"] and se_concavity[Diagnosis == "B"]
## t = 6.2101, df = 566, p-value = 1.026e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01078275 0.02075895
## sample estimates:
## mean of x mean of y
## 0.04176758 0.02599674
## p-value = 1.026e-09
with(data=BC_data,t.test(se_concave_points[Diagnosis=="M"],se_concave_points[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_concave_points[Diagnosis == "M"] and se_concave_points[Diagnosis == "B"]
## t = 10.61, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.004236499 0.006161465
## sample estimates:
## mean of x mean of y
## 0.015056635 0.009857653
## p-value < 2.2e-16
with(data=BC_data,t.test(se_symmetry[Diagnosis=="M"],se_symmetry[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_symmetry[Diagnosis == "M"] and se_symmetry[Diagnosis == "B"]
## t = -0.21818, df = 566, p-value = 0.8274
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.001567401 0.001253996
## sample estimates:
## mean of x mean of y
## 0.02042710 0.02058381
## p-value = 0.8274
with(data=BC_data,t.test(se_fractal_dimension[Diagnosis=="M"],se_fractal_dimension[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: se_fractal_dimension[Diagnosis == "M"] and se_fractal_dimension[Diagnosis == "B"]
## t = 1.815, df = 566, p-value = 0.07006
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.421657e-05 8.667302e-04
## sample estimates:
## mean of x mean of y
## 0.004052308 0.003636051
## p-value = 0.07006
with(data=BC_data,t.test(worst_radius[Diagnosis=="M"],worst_radius[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_radius[Diagnosis == "M"] and worst_radius[Diagnosis == "B"]
## t = 29.244, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 7.215380 8.254401
## sample estimates:
## mean of x mean of y
## 21.11469 13.37980
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_texture[Diagnosis=="M"],worst_texture[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_texture[Diagnosis == "M"] and worst_texture[Diagnosis == "B"]
## t = 12.374, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4.929805 6.790103
## sample estimates:
## mean of x mean of y
## 29.37502 23.51507
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_perimeter[Diagnosis=="M"],worst_perimeter[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_perimeter[Diagnosis == "M"] and worst_perimeter[Diagnosis == "B"]
## t = 29.895, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 50.60118 57.71785
## sample estimates:
## mean of x mean of y
## 141.16545 87.00594
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_area[Diagnosis=="M"],worst_area[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_area[Diagnosis == "M"] and worst_area[Diagnosis == "B"]
## t = 25.631, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 794.6115 926.5062
## sample estimates:
## mean of x mean of y
## 1419.4583 558.8994
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_smoothness[Diagnosis=="M"],worst_smoothness[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_smoothness[Diagnosis == "M"] and worst_smoothness[Diagnosis == "B"]
## t = 11.002, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01626794 0.02333904
## sample estimates:
## mean of x mean of y
## 0.1447630 0.1249595
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_compactness[Diagnosis=="M"],worst_compactness[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_compactness[Diagnosis == "M"] and worst_compactness[Diagnosis == "B"]
## t = 17.36, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1691889 0.2123581
## sample estimates:
## mean of x mean of y
## 0.3734460 0.1826725
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_concavity[Diagnosis=="M"],worst_concavity[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_concavity[Diagnosis == "M"] and worst_concavity[Diagnosis == "B"]
## t = 20.808, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2564038 0.3098552
## sample estimates:
## mean of x mean of y
## 0.4493672 0.1662377
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_concave_points[Diagnosis=="M"],worst_concave_points[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_concave_points[Diagnosis == "M"] and worst_concave_points[Diagnosis == "B"]
## t = 30.987, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1005911 0.1142066
## sample estimates:
## mean of x mean of y
## 0.18184318 0.07444434
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_symmetry[Diagnosis=="M"],worst_symmetry[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_symmetry[Diagnosis == "M"] and worst_symmetry[Diagnosis == "B"]
## t = 10.801, df = 566, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.04301350 0.06213538
## sample estimates:
## mean of x mean of y
## 0.3228204 0.2702459
## p-value < 2.2e-16
with(data=BC_data,t.test(worst_fractal_dimension[Diagnosis=="M"],worst_fractal_dimension[Diagnosis=="B"],var.equal=TRUE))
##
## Two Sample t-test
##
## data: worst_fractal_dimension[Diagnosis == "M"] and worst_fractal_dimension[Diagnosis == "B"]
## t = 8.0631, df = 566, p-value = 4.452e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.009045164 0.014871164
## sample estimates:
## mean of x mean of y
## 0.09140024 0.07944207
## p-value = 4.452e-15
library(Hotelling)
t2testBC_data <- hotelling.test(mean_radius + mean_texture + mean_perimeter + mean_area + mean_smoothness + mean_compactness + mean_concavity + mean_concave_points + mean_symmetry + mean_fractal_dimension + se_radius + se_texture + se_perimeter + se_area + se_smoothness + se_compactness + se_concavity + se_concave_points + se_symmetry + se_fractal_dimension + worst_radius + worst_texture + worst_perimeter + worst_area + worst_smoothness + worst_compactness + worst_concavity + worst_concave_points + worst_symmetry + worst_fractal_dimension ~ Diagnosis, data=BC_data)
t2testBC_data
## Test stat: 1934.8
## Numerator df: 30
## Denominator df: 537
## P-value: 0
# Output of the function hotelling.test is given
cat("T2 statistic =",t2testBC_data$stat[[1]],"\n")
## T2 statistic = 1934.799
print(t2testBC_data)
## Test stat: 1934.8
## Numerator df: 30
## Denominator df: 537
## P-value: 0
# The output of the hotelling.test function indicates that there is strong evidence against the null hypothesis, as the p-value is less than the significance level (alpha) of 0.05. Specifically, the output includes: The test statistic (T-squared): In this case, the test statistic is 1934.8. This measures the distance between the sample means of the groups relative to the within-group variability. A larger T-squared value indicates greater differences between the group means.The degrees of freedom: The numerator degrees of freedom is 30, and the denominator degrees of freedom is 537. These values depend on the number of groups being compared and the sample sizes of each group. The p-value: In this case, the p-value is 0. This is the probability of obtaining a test statistic as extreme or more extreme than the observed value, assuming the null hypothesis is true. A p-value of 0 indicates that the observed test statistic is very unlikely to have occurred by chance, and provides strong evidence against the null hypothesis.
# Since the p-value is less than alpha, we reject the null hypothesis and conclude that there are significant differences between the group means. However, it's important to note that the interpretation of the results depends on the specific context and the hypothesis being tested.
# Levene's tests based on absolute differences around means using t-tests. Standarizing the data set with scale()
matstand <- scale(BC_data[,3:32])
matMalignant <- matstand[Diagnosis == "M",]
matBenign <- matstand[Diagnosis == "B",]
vecmedianbenign <- apply(matBenign, 2, median)
# in the above 2 represents column. Hence, we are asking for column median
vecmedianmalignant <- apply(matMalignant, 2, median)
matabsdevbenign <- abs(matBenign - matrix(rep(vecmedianbenign,nrow(matBenign)),nrow=nrow(matBenign), byrow=TRUE))
matabsdevmalignant <- abs(matMalignant - matrix(rep(vecmedianmalignant,nrow(matMalignant)),nrow=nrow(matMalignant), byrow=TRUE))
matabsdev.all <- rbind(matabsdevbenign,matabsdevmalignant)
matabsdev.all <- data.frame(Diagnosis, matabsdev.all)
t.test(matabsdev.all$mean_radius[Diagnosis == "M"],matabsdev.all$mean_radius[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_radius[Diagnosis == "M"] and matabsdev.all$mean_radius[Diagnosis == "B"]
## t = -1.7461, df = 566, p-value = 0.04067
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.003737858
## sample estimates:
## mean of x mean of y
## 0.4845537 0.5507960
## The null hypothesis for this test is that the mean radius measurements of malignant tumor samples are equal to the mean radius measurements of benign tumor samples. The alternative hypothesis is that the mean radius measurements of malignant tumor samples are less than the mean radius measurements of benign tumor samples.
#The p-value is 0.04067, which is less than the commonly used significance level of 0.05. This means that we can reject the null hypothesis and conclude that the mean radius measurements of malignant tumor samples are significantly less than the mean radius measurements of benign tumor samples.
#The 95 percent confidence interval for the difference in means is (-Inf, -0.003737858), which means that we can be 95 percent confident that the true difference in means is between negative infinity and -0.003737858.
#The sample estimates for the mean radius measurements of malignant tumor samples and benign tumor samples are 0.4845537 and 0.5507960, respectively. This means that, on average, the mean radius measurements of benign tumor samples are higher than those of malignant tumor samples.
t.test(matabsdev.all$mean_texture[Diagnosis == "M"],matabsdev.all$mean_texture[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_texture[Diagnosis == "M"] and matabsdev.all$mean_texture[Diagnosis == "B"]
## t = -1.7518, df = 566, p-value = 0.04018
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.005508984
## sample estimates:
## mean of x mean of y
## 0.6195664 0.7121570
t.test(matabsdev.all$mean_perimeter[Diagnosis == "M"],matabsdev.all$mean_perimeter[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_perimeter[Diagnosis == "M"] and matabsdev.all$mean_perimeter[Diagnosis == "B"]
## t = -1.7273, df = 566, p-value = 0.04233
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.003014429
## sample estimates:
## mean of x mean of y
## 0.4686461 0.5339428
t.test(matabsdev.all$mean_area[Diagnosis == "M"],matabsdev.all$mean_area[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_area[Diagnosis == "M"] and matabsdev.all$mean_area[Diagnosis == "B"]
## t = -2.6434, df = 566, p-value = 0.004218
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.04407463
## sample estimates:
## mean of x mean of y
## 0.4184596 0.5354555
t.test(matabsdev.all$mean_smoothness[Diagnosis == "M"],matabsdev.all$mean_smoothness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_smoothness[Diagnosis == "M"] and matabsdev.all$mean_smoothness[Diagnosis == "B"]
## t = -0.1032, df = 566, p-value = 0.4589
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.07824574
## sample estimates:
## mean of x mean of y
## 0.7335777 0.7388066
t.test(matabsdev.all$mean_compactness[Diagnosis == "M"],matabsdev.all$mean_compactness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_compactness[Diagnosis == "M"] and matabsdev.all$mean_compactness[Diagnosis == "B"]
## t = -2.0639, df = 566, p-value = 0.01974
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.02040952
## sample estimates:
## mean of x mean of y
## 0.5290255 0.6301977
t.test(matabsdev.all$mean_concavity[Diagnosis == "M"],matabsdev.all$mean_concavity[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_concavity[Diagnosis == "M"] and matabsdev.all$mean_concavity[Diagnosis == "B"]
## t = -2.1737, df = 566, p-value = 0.01507
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.02493922
## sample estimates:
## mean of x mean of y
## 0.4136701 0.5167088
t.test(matabsdev.all$mean_concave_points[Diagnosis == "M"],matabsdev.all$mean_concave_points[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_concave_points[Diagnosis == "M"] and matabsdev.all$mean_concave_points[Diagnosis == "B"]
## t = -1.1928, df = 566, p-value = 0.1167
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.01795869
## sample estimates:
## mean of x mean of y
## 0.4093418 0.4564448
t.test(matabsdev.all$mean_symmetry[Diagnosis == "M"],matabsdev.all$mean_symmetry[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_symmetry[Diagnosis == "M"] and matabsdev.all$mean_symmetry[Diagnosis == "B"]
## t = -0.79298, df = 566, p-value = 0.2141
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.04569144
## sample estimates:
## mean of x mean of y
## 0.6974537 0.7398520
t.test(matabsdev.all$mean_fractal_dimension[Diagnosis == "M"],matabsdev.all$mean_fractal_dimension[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$mean_fractal_dimension[Diagnosis == "M"] and matabsdev.all$mean_fractal_dimension[Diagnosis == "B"]
## t = -0.23818, df = 566, p-value = 0.4059
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.08549341
## sample estimates:
## mean of x mean of y
## 0.7279925 0.7424409
t.test(matabsdev.all$se_radius[Diagnosis == "M"],matabsdev.all$se_radius[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_radius[Diagnosis == "M"] and matabsdev.all$se_radius[Diagnosis == "B"]
## t = -1.701, df = 566, p-value = 0.04475
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.00310138
## sample estimates:
## mean of x mean of y
## 0.4440878 0.5428160
t.test(matabsdev.all$se_texture[Diagnosis == "M"],matabsdev.all$se_texture[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_texture[Diagnosis == "M"] and matabsdev.all$se_texture[Diagnosis == "B"]
## t = 1.0551, df = 566, p-value = 0.8541
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.1666405
## sample estimates:
## mean of x mean of y
## 0.7687200 0.7036643
t.test(matabsdev.all$se_perimeter[Diagnosis == "M"],matabsdev.all$se_perimeter[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_perimeter[Diagnosis == "M"] and matabsdev.all$se_perimeter[Diagnosis == "B"]
## t = -1.746, df = 566, p-value = 0.04068
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.005976637
## sample estimates:
## mean of x mean of y
## 0.4270034 0.5330371
t.test(matabsdev.all$se_area[Diagnosis == "M"],matabsdev.all$se_area[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_area[Diagnosis == "M"] and matabsdev.all$se_area[Diagnosis == "B"]
## t = -2.3586, df = 566, p-value = 0.009342
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.04725161
## sample estimates:
## mean of x mean of y
## 0.2844177 0.4411603
t.test(matabsdev.all$se_smoothness[Diagnosis == "M"],matabsdev.all$se_smoothness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_smoothness[Diagnosis == "M"] and matabsdev.all$se_smoothness[Diagnosis == "B"]
## t = 0.70231, df = 566, p-value = 0.7586
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.1554153
## sample estimates:
## mean of x mean of y
## 0.7071745 0.6607249
t.test(matabsdev.all$se_compactness[Diagnosis == "M"],matabsdev.all$se_compactness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_compactness[Diagnosis == "M"] and matabsdev.all$se_compactness[Diagnosis == "B"]
## t = -1.2939, df = 566, p-value = 0.09811
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.02338511
## sample estimates:
## mean of x mean of y
## 0.5796344 0.6652031
t.test(matabsdev.all$se_concavity[Diagnosis == "M"],matabsdev.all$se_concavity[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_concavity[Diagnosis == "M"] and matabsdev.all$se_concavity[Diagnosis == "B"]
## t = -0.66881, df = 566, p-value = 0.2519
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.07200066
## sample estimates:
## mean of x mean of y
## 0.4862588 0.5354594
t.test(matabsdev.all$se_concave_points[Diagnosis == "M"],matabsdev.all$se_concave_points[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_concave_points[Diagnosis == "M"] and matabsdev.all$se_concave_points[Diagnosis == "B"]
## t = 0.66471, df = 566, p-value = 0.7467
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.1350191
## sample estimates:
## mean of x mean of y
## 0.6563084 0.6174943
t.test(matabsdev.all$se_symmetry[Diagnosis == "M"],matabsdev.all$se_symmetry[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_symmetry[Diagnosis == "M"] and matabsdev.all$se_symmetry[Diagnosis == "B"]
## t = -0.87187, df = 566, p-value = 0.1918
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.05280659
## sample estimates:
## mean of x mean of y
## 0.6308683 0.6902229
t.test(matabsdev.all$se_fractal_dimension[Diagnosis == "M"],matabsdev.all$se_fractal_dimension[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$se_fractal_dimension[Diagnosis == "M"] and matabsdev.all$se_fractal_dimension[Diagnosis == "B"]
## t = 1.2145, df = 566, p-value = 0.8875
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.2114866
## sample estimates:
## mean of x mean of y
## 0.6358429 0.5460992
t.test(matabsdev.all$worst_radius[Diagnosis == "M"],matabsdev.all$worst_radius[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_radius[Diagnosis == "M"] and matabsdev.all$worst_radius[Diagnosis == "B"]
## t = -2.5217, df = 566, p-value = 0.005976
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.03237456
## sample estimates:
## mean of x mean of y
## 0.4091166 0.5025075
t.test(matabsdev.all$worst_texture[Diagnosis == "M"],matabsdev.all$worst_texture[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_texture[Diagnosis == "M"] and matabsdev.all$worst_texture[Diagnosis == "B"]
## t = -1.6553, df = 566, p-value = 0.04921
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.000375359
## sample estimates:
## mean of x mean of y
## 0.6419832 0.7226258
t.test(matabsdev.all$worst_perimeter[Diagnosis == "M"],matabsdev.all$worst_perimeter[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_perimeter[Diagnosis == "M"] and matabsdev.all$worst_perimeter[Diagnosis == "B"]
## t = -2.6433, df = 566, p-value = 0.004219
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.03652691
## sample estimates:
## mean of x mean of y
## 0.3979253 0.4948913
t.test(matabsdev.all$worst_area[Diagnosis == "M"],matabsdev.all$worst_area[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_area[Diagnosis == "M"] and matabsdev.all$worst_area[Diagnosis == "B"]
## t = -3.2923, df = 566, p-value = 0.0005278
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.07605857
## sample estimates:
## mean of x mean of y
## 0.3403765 0.4926226
t.test(matabsdev.all$worst_smoothness[Diagnosis == "M"],matabsdev.all$worst_smoothness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_smoothness[Diagnosis == "M"] and matabsdev.all$worst_smoothness[Diagnosis == "B"]
## t = 0.019681, df = 566, p-value = 0.5078
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.08214453
## sample estimates:
## mean of x mean of y
## 0.7099286 0.7089589
t.test(matabsdev.all$worst_compactness[Diagnosis == "M"],matabsdev.all$worst_compactness[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_compactness[Diagnosis == "M"] and matabsdev.all$worst_compactness[Diagnosis == "B"]
## t = -1.8218, df = 566, p-value = 0.0345
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.008626281
## sample estimates:
## mean of x mean of y
## 0.5217852 0.6119643
t.test(matabsdev.all$worst_concavity[Diagnosis == "M"],matabsdev.all$worst_concavity[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_concavity[Diagnosis == "M"] and matabsdev.all$worst_concavity[Diagnosis == "B"]
## t = -1.735, df = 566, p-value = 0.04165
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.004240361
## sample estimates:
## mean of x mean of y
## 0.4767373 0.5609101
t.test(matabsdev.all$worst_concave_points[Diagnosis == "M"],matabsdev.all$worst_concave_points[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_concave_points[Diagnosis == "M"] and matabsdev.all$worst_concave_points[Diagnosis == "B"]
## t = -1.2311, df = 566, p-value = 0.1094
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.01365571
## sample estimates:
## mean of x mean of y
## 0.4518694 0.4922350
t.test(matabsdev.all$worst_symmetry[Diagnosis == "M"],matabsdev.all$worst_symmetry[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_symmetry[Diagnosis == "M"] and matabsdev.all$worst_symmetry[Diagnosis == "B"]
## t = -1.0991, df = 566, p-value = 0.1361
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.03033607
## sample estimates:
## mean of x mean of y
## 0.6238138 0.6846072
t.test(matabsdev.all$worst_fractal_dimension[Diagnosis == "M"],matabsdev.all$worst_fractal_dimension[Diagnosis == "B"], alternative="less",var.equal = TRUE)
##
## Two Sample t-test
##
## data: matabsdev.all$worst_fractal_dimension[Diagnosis == "M"] and matabsdev.all$worst_fractal_dimension[Diagnosis == "B"]
## t = -1.2337, df = 566, p-value = 0.1089
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.02436987
## sample estimates:
## mean of x mean of y
## 0.6348451 0.7074898
matstand.all <- data.frame(Diagnosis, matstand)
colnames(matstand.all) <- colnames(BC_data[2:32])
t2testcdata <- hotelling.test(mean_radius + mean_texture + mean_perimeter + mean_area + mean_smoothness + mean_compactness + mean_concavity + mean_concave_points + mean_symmetry + mean_fractal_dimension + se_radius + se_texture + se_perimeter + se_area + se_smoothness + se_compactness + se_concavity + se_concave_points + se_symmetry + se_fractal_dimension + worst_radius + worst_texture + worst_perimeter + worst_area + worst_smoothness + worst_compactness + worst_concavity + worst_concave_points + worst_symmetry + worst_fractal_dimension ~ Diagnosis, data=matstand.all)
cat("T2 statistic =",t2testcdata$stat[[1]],"\n")
## T2 statistic = 1934.799
print(t2testcdata)
## Test stat: 1934.8
## Numerator df: 30
## Denominator df: 537
## P-value: 0
# In the above we standardized using scale function
#matabsdev.all
# We can also look at Van Valen's test. Equivalent to the comparison of mean absolute median
# diferences between two groups. In the sparrows' example, the Van Valen's test
# is one-sided (Mean dij for survivors < Mean dij for non-survivors)
# dij is the norm of the individual vector i composed by the absolute
# deviations computed for all the variables in sample j.
# These norms define the second column of the data frame d.all
d.all <- data.frame(Diagnosis,sqrt(rowSums(matabsdev.all[,-1]^2)))
#d.all
colnames(d.all)[2] <- "dij"
#d.all
head(d.all)
## Diagnosis dij
## 1 M 2.497548
## 2 M 2.764340
## 3 M 3.071548
## 4 M 3.689073
## 5 M 2.993284
## 6 M 2.200929
with(d.all, t.test(dij[Diagnosis=="M"], dij[Diagnosis=="B"],var.equal=TRUE, alternative="less"))
##
## Two Sample t-test
##
## data: dij[Diagnosis == "M"] and dij[Diagnosis == "B"]
## t = -1.6824, df = 566, p-value = 0.04652
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.006935767
## sample estimates:
## mean of x mean of y
## 3.897171 4.231733
sprintf("d-values for Malignant: Mean = %2.3f, Variance = %2.3f",mean(d.all$dij[Diagnosis=="M"]),var(d.all$dij[Diagnosis=="M"]))
## [1] "d-values for Malignant: Mean = 3.897, Variance = 4.900"
sprintf("d-values for Benign: Mean = %2.3f, Variance = %2.3f",mean(d.all$dij[Diagnosis=="B"]),var(d.all$dij[Diagnosis=="B"]))
## [1] "d-values for Benign: Mean = 4.232, Variance = 5.448"
# Hotelling Test
# Leverne test is used to verify Homoscedasticity. It tests if the variance of two samples are # #equal. Levene's test is an inferential statistic used to assess the equality of variances for a #variable calculated for two or more groups.[1] Some common statistical procedures assume that #variances of the populations from which different samples are drawn are equal. Levene's test #assesses this assumption.
library(car)
#leveneTest() produces a two-sided test
leveneTest(mean_radius ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 91.591 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_texture ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 1.0103 0.3153
## 566
leveneTest(mean_perimeter ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 91.88 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_area ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 171.64 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_smoothness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.9135 0.3396
## 566
leveneTest(mean_compactness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 37.938 1.386e-09 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_concavity ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 68.567 8.88e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_concave_points ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 92.935 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(mean_symmetry ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 1.7553 0.1858
## 566
leveneTest(mean_fractal_dimension ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 5.5127 0.01922 *
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_radius ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 108.61 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_texture ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 5.5316 0.01902 *
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_perimeter ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 91.937 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_area ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 116.6 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_smoothness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 4.5962 0.03247 *
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_compactness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 3.6443 0.05677 .
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_concavity ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.028 0.8672
## 566
leveneTest(se_concave_points ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.1342 0.7143
## 566
leveneTest(se_symmetry ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 2.974 0.08516 .
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(se_fractal_dimension ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.5963 0.4403
## 566
leveneTest(worst_radius ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 131.08 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_texture ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.1699 0.6804
## 566
leveneTest(worst_perimeter ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 125.8 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_area ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 194.51 < 2.2e-16 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_smoothness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.5165 0.4726
## 566
leveneTest(worst_compactness ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 58.72 7.939e-14 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_concavity ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 18.666 1.839e-05 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_concave_points ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 13.357 0.0002815 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_symmetry ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 37.331 1.856e-09 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(worst_fractal_dimension ~ Diagnosis, data=BC_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 34.663 6.721e-09 ***
## 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANOVA
summary(aov(mean_radius ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 3745 3745 643.4 <2e-16 ***
## Residuals 566 3294 6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_texture ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 1858 1858.3 122.7 <2e-16 ***
## Residuals 566 8570 15.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_perimeter ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 184063 184063 692.9 <2e-16 ***
## Residuals 566 150355 266
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_area ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 35239058 35239058 570.1 <2e-16 ***
## Residuals 566 34984077 61809
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_smoothness ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.01420 0.014199 82.29 <2e-16 ***
## Residuals 566 0.09766 0.000173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_compactness ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.5513 0.5513 311.1 <2e-16 ***
## Residuals 566 1.0029 0.0018
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_concavity ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 1.725 1.7252 530.7 <2e-16 ***
## Residuals 566 1.840 0.0033
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_concave_points ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.5097 0.5097 858.8 <2e-16 ***
## Residuals 566 0.3359 0.0006
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_symmetry ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.0453 0.04534 67.92 1.19e-15 ***
## Residuals 566 0.3778 0.00067
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(mean_fractal_dimension ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.000009 9.190e-06 0.185 0.667
## Residuals 566 0.028051 4.956e-05
summary(aov(se_radius ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 13.81 13.810 265.9 <2e-16 ***
## Residuals 566 29.39 0.052
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_texture ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.01 0.00852 0.028 0.867
## Residuals 566 172.75 0.30520
summary(aov(se_perimeter ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 703.6 703.6 251.2 <2e-16 ***
## Residuals 566 1585.5 2.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_area ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 347035 347035 240.8 <2e-16 ***
## Residuals 566 815597 1441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_smoothness ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.000023 2.273e-05 2.524 0.113
## Residuals 566 0.005097 9.006e-06
summary(aov(se_compactness ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.01536 0.015364 52.31 1.55e-12 ***
## Residuals 566 0.16624 0.000294
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_concavity ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.0330 0.03298 38.56 1.03e-09 ***
## Residuals 566 0.4841 0.00086
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_concave_points ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.003585 0.003585 112.6 <2e-16 ***
## Residuals 566 0.018024 0.000032
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(se_symmetry ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.00000 3.260e-06 0.048 0.827
## Residuals 566 0.03872 6.841e-05
summary(aov(se_fractal_dimension ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.000023 2.298e-05 3.294 0.0701 .
## Residuals 566 0.003948 6.976e-06
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_radius ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 7934 7934 855.2 <2e-16 ***
## Residuals 566 5251 9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_texture ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 4554 4554 153.1 <2e-16 ***
## Residuals 566 16833 30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_perimeter ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 389002 389002 893.7 <2e-16 ***
## Residuals 566 246353 435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_area ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 98211759 98211759 656.9 <2e-16 ***
## Residuals 566 84617041 149500
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_smoothness ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.05201 0.05201 121 <2e-16 ***
## Residuals 566 0.24321 0.00043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_compactness ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 4.827 4.827 301.4 <2e-16 ***
## Residuals 566 9.065 0.016
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_concavity ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 10.63 10.631 433 <2e-16 ***
## Residuals 566 13.90 0.025
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_concave_points ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 1.5297 1.5297 960.2 <2e-16 ***
## Residuals 566 0.9017 0.0016
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_symmetry ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.3666 0.3666 116.7 <2e-16 ***
## Residuals 566 1.7785 0.0031
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(worst_fractal_dimension ~ Diagnosis))
## Df Sum Sq Mean Sq F value Pr(>F)
## Diagnosis 1 0.01896 0.018964 65.01 4.45e-15 ***
## Residuals 566 0.16510 0.000292
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# mahalanobis
library(stats)
BC_data_MD <- mahalanobis(BC_data_x, BC_data_cm, BC_data_S)
#BC_data_MD
BC_data$pvalues <- pchisq(BC_data_MD, df=3, lower.tail=FALSE)
#BC_data
# BoxM
library(biotools)
boxM(BC_data[,3:32],Diagnosis)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: BC_data[, 3:32]
## Chi-Sq (approx.) = 7018, df = 465, p-value < 2.2e-16
# The Box's M-test for Homogeneity of Covariance Matrices is a statistical test used to determine whether the covariance matrices of two or more groups are equal
# The test statistic for Box's M-test follows a chi-squared distribution with degrees of freedom equal to the product of the number of groups and the number of variables. In the output, the test statistic is reported as Chi-Sq (approx.) = 7018, with degrees of freedom (df) = 465. This indicates that there are 465 variables in the data and that the test has been performed on multiple groups.
#The p-value for the test is < 2.2e-16, which means that the observed test statistic is highly significant at conventional levels of significance. This indicates strong evidence against the null hypothesis of homogeneity of covariance matrices, meaning that the covariance matrices for the groups are not equal.
#Therefore, based on the results of the Box's M-test, it can be inferred that there is evidence of significant differences in the covariance matrices between the groups in the data.
# MANOVA
summary(manova(as.matrix(BC_data[,-2])~ Diagnosis))
## Df Pillai approx F num Df den Df Pr(>F)
## Diagnosis 1 0.77871 58.833 32 535 < 2.2e-16 ***
## Residuals 566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The output shows the results of the MANOVA with respect to the effect of the Diagnosis variable on the dependent variables.
#The first row of the output shows the results of the MANOVA, with the Pillai's trace statistic being 0.77383 and the approximate F statistic being 59.158. The denominator degrees of freedom (den Df) are 536, indicating that there are 536 observations in the dataset. The null hypothesis of the MANOVA is that there are no differences in the means of the dependent variables between the groups defined by the Diagnosis variable.
#The p-value for the MANOVA is reported as < 2.2e-16, which is highly significant. implying a strong evidence against the null hypothesis, indicating that there are significant differences in the means of the dependent variables between the Malignant and Benign groups defined by the Diagnosis variable.
#The second row of the output shows the results of the residuals, which indicate the variability in the dependent variables that is not explained by the Diagnosis variable. The residual degrees of freedom are 566, indicating that there are 566 observations in the dataset that are not accounted for by the Diagnosis variable.
# In summary, based on the results of the MANOVA, it can be inferred that there are significant differences in the means of the dependent variables between the groups defined by the Diagnosis variable in the BC_data dataset.
## PCA
#cor(BC_data[-2])
bca_pca <- prcomp(BC_data[,-2],scale=TRUE)
#bca_pca
summary(bca_pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.6524 2.3960 1.69017 1.4109 1.28656 1.11383 0.99173
## Proportion of Variance 0.4169 0.1794 0.08927 0.0622 0.05173 0.03877 0.03074
## Cumulative Proportion 0.4169 0.5963 0.68555 0.7478 0.79948 0.83825 0.86898
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.92636 0.81702 0.67952 0.63468 0.59112 0.54334 0.51050
## Proportion of Variance 0.02682 0.02086 0.01443 0.01259 0.01092 0.00923 0.00814
## Cumulative Proportion 0.89580 0.91666 0.93109 0.94367 0.95459 0.96382 0.97196
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 0.49270 0.39480 0.30692 0.28141 0.24355 0.22812 0.22244
## Proportion of Variance 0.00759 0.00487 0.00294 0.00247 0.00185 0.00163 0.00155
## Cumulative Proportion 0.97955 0.98442 0.98736 0.98984 0.99169 0.99332 0.99487
## PC22 PC23 PC24 PC25 PC26 PC27 PC28
## Standard deviation 0.17626 0.17353 0.16603 0.15617 0.13476 0.12484 0.09023
## Proportion of Variance 0.00097 0.00094 0.00086 0.00076 0.00057 0.00049 0.00025
## Cumulative Proportion 0.99584 0.99678 0.99764 0.99840 0.99897 0.99946 0.99971
## PC29 PC30 PC31 PC32
## Standard deviation 0.08256 0.03995 0.02734 0.01135
## Proportion of Variance 0.00021 0.00005 0.00002 0.00000
## Cumulative Proportion 0.99992 0.99997 1.00000 1.00000
(eigen_bca <- bca_pca$sdev^2)
## [1] 1.334011e+01 5.740679e+00 2.856671e+00 1.990515e+00 1.655248e+00
## [6] 1.240621e+00 9.835331e-01 8.581344e-01 6.675166e-01 4.617481e-01
## [11] 4.028135e-01 3.494252e-01 2.952150e-01 2.606132e-01 2.427493e-01
## [16] 1.558668e-01 9.419889e-02 7.918881e-02 5.931593e-02 5.203975e-02
## [21] 4.948143e-02 3.106843e-02 3.011234e-02 2.756705e-02 2.439053e-02
## [26] 1.816117e-02 1.558456e-02 8.140586e-03 6.816197e-03 1.596090e-03
## [31] 7.474333e-04 1.288493e-04
names(eigen_bca) <- paste("PC",1:8,sep="")
#eigen_bca
sumlambdas <- sum(eigen_bca)
#sumlambdas
propvar <- eigen_bca/sumlambdas
#propvar
cumvar_bca <- cumsum(propvar)
#cumvar_bca
matlambdas <- rbind(eigen_bca,propvar,cumvar_bca)
rownames(matlambdas) <- c("Eigenvalues","Prop. variance","Cum. prop. variance")
round(matlambdas,4)
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## Eigenvalues 13.3401 5.7407 2.8567 1.9905 1.6552 1.2406 0.9835 0.8581
## Prop. variance 0.4169 0.1794 0.0893 0.0622 0.0517 0.0388 0.0307 0.0268
## Cum. prop. variance 0.4169 0.5963 0.6855 0.7477 0.7995 0.8382 0.8690 0.8958
## <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Eigenvalues 0.6675 0.4617 0.4028 0.3494 0.2952 0.2606 0.2427 0.1559
## Prop. variance 0.0209 0.0144 0.0126 0.0109 0.0092 0.0081 0.0076 0.0049
## Cum. prop. variance 0.9167 0.9311 0.9437 0.9546 0.9638 0.9720 0.9795 0.9844
## <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Eigenvalues 0.0942 0.0792 0.0593 0.0520 0.0495 0.0311 0.0301 0.0276
## Prop. variance 0.0029 0.0025 0.0019 0.0016 0.0015 0.0010 0.0009 0.0009
## Cum. prop. variance 0.9874 0.9898 0.9917 0.9933 0.9949 0.9958 0.9968 0.9976
## <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## Eigenvalues 0.0244 0.0182 0.0156 0.0081 0.0068 0.0016 7e-04 1e-04
## Prop. variance 0.0008 0.0006 0.0005 0.0003 0.0002 0.0000 0e+00 0e+00
## Cum. prop. variance 0.9984 0.9990 0.9995 0.9997 0.9999 1.0000 1e+00 1e+00
summary(bca_pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.6524 2.3960 1.69017 1.4109 1.28656 1.11383 0.99173
## Proportion of Variance 0.4169 0.1794 0.08927 0.0622 0.05173 0.03877 0.03074
## Cumulative Proportion 0.4169 0.5963 0.68555 0.7478 0.79948 0.83825 0.86898
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.92636 0.81702 0.67952 0.63468 0.59112 0.54334 0.51050
## Proportion of Variance 0.02682 0.02086 0.01443 0.01259 0.01092 0.00923 0.00814
## Cumulative Proportion 0.89580 0.91666 0.93109 0.94367 0.95459 0.96382 0.97196
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 0.49270 0.39480 0.30692 0.28141 0.24355 0.22812 0.22244
## Proportion of Variance 0.00759 0.00487 0.00294 0.00247 0.00185 0.00163 0.00155
## Cumulative Proportion 0.97955 0.98442 0.98736 0.98984 0.99169 0.99332 0.99487
## PC22 PC23 PC24 PC25 PC26 PC27 PC28
## Standard deviation 0.17626 0.17353 0.16603 0.15617 0.13476 0.12484 0.09023
## Proportion of Variance 0.00097 0.00094 0.00086 0.00076 0.00057 0.00049 0.00025
## Cumulative Proportion 0.99584 0.99678 0.99764 0.99840 0.99897 0.99946 0.99971
## PC29 PC30 PC31 PC32
## Standard deviation 0.08256 0.03995 0.02734 0.01135
## Proportion of Variance 0.00021 0.00005 0.00002 0.00000
## Cumulative Proportion 0.99992 0.99997 1.00000 1.00000
#bca_pca$rotation
#print(bca_pca)
## Sample scores stored in sparrow_pca$x
#bca_pca$x
# Identifying the scores by their survival status
bcatyp_pca <- cbind(data.frame(Diagnosis),bca_pca$x)
#bcatyp_pca
# Means of scores for all the PC's classified by Survival status
tabmeansPC <- aggregate(bcatyp_pca[,2:33],by=list(Diagnosis=BC_data$Diagnosis),mean)
#tabmeansPC
tabmeansPC <- tabmeansPC[rev(order(tabmeansPC$Diagnosis)),]
#tabmeansPC
tabfmeans <- t(tabmeansPC[,-1])
#tabfmeans
colnames(tabfmeans) <- t(as.vector(tabmeansPC[1]$Diagnosis))
#tabfmeans
# Standard deviations of scores for all the PC's classified by Survival status
tabsdsPC <- aggregate(bcatyp_pca[,2:33],by=list(Diagnosis=BC_data$Diagnosis),sd)
tabfsds <- t(tabsdsPC[,-1])
colnames(tabfsds) <- t(as.vector(tabsdsPC[1]$Diagnosis))
#tabfsds
t.test(PC1~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC1 by BC_data$Diagnosis
## t = -26.369, df = 286.41, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -6.382975 -5.496271
## sample estimates:
## mean in group B mean in group M
## -2.206444 3.733178
##p-value < 2.2e-16
t.test(PC2~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC2 by BC_data$Diagnosis
## t = 4.222, df = 356.65, p-value = 3.077e-05
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## 0.4930636 1.3529570
## sample estimates:
## mean in group B mean in group M
## 0.3428788 -0.5801315
t.test(PC3~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC3 by BC_data$Diagnosis
## t = 3.6262, df = 319.6, p-value = 0.0003346
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## 0.2659600 0.8968407
## sample estimates:
## mean in group B mean in group M
## 0.2159780 -0.3654224
t.test(PC4~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC4 by BC_data$Diagnosis
## t = 3.2183, df = 452.33, p-value = 0.001382
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## 0.1510156 0.6247041
## sample estimates:
## mean in group B mean in group M
## 0.1440818 -0.2437781
t.test(PC5~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC5 by BC_data$Diagnosis
## t = -2.5166, df = 534.03, p-value = 0.01214
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.46426316 -0.05721288
## sample estimates:
## mean in group B mean in group M
## -0.09685867 0.16387935
t.test(PC6~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC6 by BC_data$Diagnosis
## t = 0.15771, df = 337.54, p-value = 0.8748
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.1904591 0.2236619
## sample estimates:
## mean in group B mean in group M
## 0.00616707 -0.01043433
t.test(PC7~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC7 by BC_data$Diagnosis
## t = 0.70574, df = 409.97, p-value = 0.4808
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.1110673 0.2354837
## sample estimates:
## mean in group B mean in group M
## 0.02310902 -0.03909915
t.test(PC8~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC8 by BC_data$Diagnosis
## t = 1.5201, df = 485.35, p-value = 0.1291
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.02918548 0.22869557
## sample estimates:
## mean in group B mean in group M
## 0.03705689 -0.06269815
t.test(PC9~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC9 by BC_data$Diagnosis
## t = -1.673, df = 306.95, p-value = 0.09536
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.28912604 0.02340931
## sample estimates:
## mean in group B mean in group M
## -0.04935408 0.08350429
t.test(PC10~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC10 by BC_data$Diagnosis
## t = -1.4525, df = 414.02, p-value = 0.1471
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.20550940 0.03085812
## sample estimates:
## mean in group B mean in group M
## -0.03243963 0.05488601
t.test(PC11~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC11 by BC_data$Diagnosis
## t = 0.64122, df = 367.72, p-value = 0.5218
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.07733515 0.15217372
## sample estimates:
## mean in group B mean in group M
## 0.01390047 -0.02351881
t.test(PC12~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC12 by BC_data$Diagnosis
## t = -0.063141, df = 456.18, p-value = 0.9497
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.10304968 0.09663385
## sample estimates:
## mean in group B mean in group M
## -0.001191673 0.002016243
t.test(PC13~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC13 by BC_data$Diagnosis
## t = -0.27121, df = 435.23, p-value = 0.7864
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.1060410 0.0803243
## sample estimates:
## mean in group B mean in group M
## -0.004776610 0.008081752
t.test(PC14~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC14 by BC_data$Diagnosis
## t = -0.068032, df = 440.9, p-value = 0.9458
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.09021088 0.08417441
## sample estimates:
## mean in group B mean in group M
## -0.001121210 0.001897024
t.test(PC15~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC15 by BC_data$Diagnosis
## t = 2.0301, df = 301.31, p-value = 0.04322
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## 0.002996614 0.192382768
## sample estimates:
## mean in group B mean in group M
## 0.03628966 -0.06140003
t.test(PC16~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC16 by BC_data$Diagnosis
## t = 1.7652, df = 412.53, p-value = 0.07826
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.007004857 0.130353268
## sample estimates:
## mean in group B mean in group M
## 0.02291066 -0.03876354
t.test(PC17~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC17 by BC_data$Diagnosis
## t = 2.3717, df = 353.93, p-value = 0.01824
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## 0.01150734 0.12325890
## sample estimates:
## mean in group B mean in group M
## 0.02503140 -0.04235171
t.test(PC18~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC18 by BC_data$Diagnosis
## t = 0.05802, df = 390.24, p-value = 0.9538
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.04848328 0.05143186
## sample estimates:
## mean in group B mean in group M
## 0.0005476693 -0.0009266253
t.test(PC19~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC19 by BC_data$Diagnosis
## t = -0.58216, df = 287.14, p-value = 0.5609
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.06194417 0.03366554
## sample estimates:
## mean in group B mean in group M
## -0.005252457 0.008886858
t.test(PC20~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC20 by BC_data$Diagnosis
## t = 1.5089, df = 291.38, p-value = 0.1324
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.01037032 0.07852305
## sample estimates:
## mean in group B mean in group M
## 0.01265865 -0.02141771
t.test(PC21~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC21 by BC_data$Diagnosis
## t = 1.2616, df = 300.8, p-value = 0.2081
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.01539713 0.07039938
## sample estimates:
## mean in group B mean in group M
## 0.01021609 -0.01728504
t.test(PC22~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC22 by BC_data$Diagnosis
## t = 1.2681, df = 336.19, p-value = 0.2056
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.01163875 0.05387201
## sample estimates:
## mean in group B mean in group M
## 0.007844382 -0.013272248
t.test(PC23~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC23 by BC_data$Diagnosis
## t = 0.99216, df = 377.55, p-value = 0.3218
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.01540649 0.04679034
## sample estimates:
## mean in group B mean in group M
## 0.005829218 -0.009862706
t.test(PC24~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC24 by BC_data$Diagnosis
## t = 0.59476, df = 350.74, p-value = 0.5524
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.02126002 0.03969238
## sample estimates:
## mean in group B mean in group M
## 0.003423616 -0.005792563
t.test(PC25~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC25 by BC_data$Diagnosis
## t = -0.50442, df = 302.95, p-value = 0.6143
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.03780270 0.02237662
## sample estimates:
## mean in group B mean in group M
## -0.002865232 0.004847809
t.test(PC26~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC26 by BC_data$Diagnosis
## t = -1.6381, df = 319.92, p-value = 0.1024
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.046592993 0.004255987
## sample estimates:
## mean in group B mean in group M
## -0.007863652 0.013304851
t.test(PC27~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC27 by BC_data$Diagnosis
## t = 0.18981, df = 305.6, p-value = 0.8496
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.02167276 0.02630018
## sample estimates:
## mean in group B mean in group M
## 0.0008594953 -0.0014542171
t.test(PC28~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC28 by BC_data$Diagnosis
## t = 1.2535, df = 304.81, p-value = 0.211
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.006287387 0.028356092
## sample estimates:
## mean in group B mean in group M
## 0.004099029 -0.006935324
t.test(PC29~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC29 by BC_data$Diagnosis
## t = 0.77703, df = 310.83, p-value = 0.4377
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.009537637 0.021986910
## sample estimates:
## mean in group B mean in group M
## 0.002312321 -0.003912316
t.test(PC30~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC30 by BC_data$Diagnosis
## t = 0.28563, df = 263, p-value = 0.7754
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.006925811 0.009276098
## sample estimates:
## mean in group B mean in group M
## 0.0004365410 -0.0007386026
t.test(PC31~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC31 by BC_data$Diagnosis
## t = 0.89066, df = 269.07, p-value = 0.3739
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.003007261 0.007975776
## sample estimates:
## mean in group B mean in group M
## 0.0009228492 -0.0015614084
t.test(PC32~BC_data$Diagnosis,data=bcatyp_pca)
##
## Welch Two Sample t-test
##
## data: PC32 by BC_data$Diagnosis
## t = 0.52676, df = 281, p-value = 0.5988
## alternative hypothesis: true difference in means between group B and group M is not equal to 0
## 95 percent confidence interval:
## -0.001644605 0.002846424
## sample estimates:
## mean in group B mean in group M
## 0.0002232252 -0.0003776844
## F ratio tests
var.test(PC1~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC1 by BC_data$Diagnosis
## F = 0.30251, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2366496 0.3837642
## sample estimates:
## ratio of variances
## 0.3025121
var.test(PC2~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC2 by BC_data$Diagnosis
## F = 0.59095, num df = 356, denom df = 210, p-value = 1.327e-05
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4622905 0.7496760
## sample estimates:
## ratio of variances
## 0.5909516
var.test(PC3~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC3 by BC_data$Diagnosis
## F = 0.43579, num df = 356, denom df = 210, p-value = 5.047e-12
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3409085 0.5528362
## sample estimates:
## ratio of variances
## 0.4357874
var.test(PC4~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC4 by BC_data$Diagnosis
## F = 1.0673, num df = 356, denom df = 210, p-value = 0.6053
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8349345 1.3539763
## sample estimates:
## ratio of variances
## 1.067307
var.test(PC5~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC5 by BC_data$Diagnosis
## F = 1.7665, num df = 356, denom df = 210, p-value = 7.715e-06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 1.381905 2.240974
## sample estimates:
## ratio of variances
## 1.766506
var.test(PC6~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC6 by BC_data$Diagnosis
## F = 0.5098, num df = 356, denom df = 210, p-value = 2.254e-08
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3988079 0.6467291
## sample estimates:
## ratio of variances
## 0.509801
var.test(PC7~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC7 by BC_data$Diagnosis
## F = 0.83695, num df = 356, denom df = 210, p-value = 0.1427
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6547291 1.0617453
## sample estimates:
## ratio of variances
## 0.8369482
var.test(PC8~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC8 by BC_data$Diagnosis
## F = 8.0262, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 6.278731 10.181940
## sample estimates:
## ratio of variances
## 8.026178
var.test(PC9~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC9 by BC_data$Diagnosis
## F = 0.38454, num df = 356, denom df = 210, p-value = 1.878e-15
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3008210 0.4878281
## sample estimates:
## ratio of variances
## 0.3845431
var.test(PC10~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC10 by BC_data$Diagnosis
## F = 0.85729, num df = 356, denom df = 210, p-value = 0.2046
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6706437 1.0875532
## sample estimates:
## ratio of variances
## 0.857292
var.test(PC11~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC11 by BC_data$Diagnosis
## F = 0.63934, num df = 356, denom df = 210, p-value = 0.0002167
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.500140 0.811055
## sample estimates:
## ratio of variances
## 0.6393351
var.test(PC12~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC12 by BC_data$Diagnosis
## F = 1.0906, num df = 356, denom df = 210, p-value = 0.4896
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8531713 1.3835502
## sample estimates:
## ratio of variances
## 1.090619
var.test(PC13~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC13 by BC_data$Diagnosis
## F = 0.9691, num df = 356, denom df = 210, p-value = 0.7904
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7581115 1.2293959
## sample estimates:
## ratio of variances
## 0.9691032
var.test(PC14~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC14 by BC_data$Diagnosis
## F = 1.0007, num df = 356, denom df = 210, p-value = 0.9963
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7828504 1.2695138
## sample estimates:
## ratio of variances
## 1.000727
var.test(PC15~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC15 by BC_data$Diagnosis
## F = 0.36187, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2830879 0.4590710
## sample estimates:
## ratio of variances
## 0.3618746
var.test(PC16~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC16 by BC_data$Diagnosis
## F = 0.84976, num df = 356, denom df = 210, p-value = 0.1799
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.664755 1.078004
## sample estimates:
## ratio of variances
## 0.8497644
var.test(PC17~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC17 by BC_data$Diagnosis
## F = 0.57925, num df = 356, denom df = 210, p-value = 6.103e-06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4531336 0.7348267
## sample estimates:
## ratio of variances
## 0.5792462
var.test(PC18~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC18 by BC_data$Diagnosis
## F = 0.74173, num df = 356, denom df = 210, p-value = 0.01376
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5802426 0.9409538
## sample estimates:
## ratio of variances
## 0.7417312
var.test(PC19~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC19 by BC_data$Diagnosis
## F = 0.30542, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2389225 0.3874500
## sample estimates:
## ratio of variances
## 0.3054175
var.test(PC20~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC20 by BC_data$Diagnosis
## F = 0.32225, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2520934 0.4088088
## sample estimates:
## ratio of variances
## 0.3222541
var.test(PC21~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC21 by BC_data$Diagnosis
## F = 0.35985, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2815050 0.4565042
## sample estimates:
## ratio of variances
## 0.3598512
var.test(PC22~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC22 by BC_data$Diagnosis
## F = 0.50418, num df = 356, denom df = 210, p-value = 1.315e-08
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3944098 0.6395969
## sample estimates:
## ratio of variances
## 0.5041789
var.test(PC23~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC23 by BC_data$Diagnosis
## F = 0.68331, num df = 356, denom df = 210, p-value = 0.001663
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5345431 0.8668448
## sample estimates:
## ratio of variances
## 0.6833129
var.test(PC24~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC24 by BC_data$Diagnosis
## F = 0.56554, num df = 356, denom df = 210, p-value = 2.324e-06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4424154 0.7174454
## sample estimates:
## ratio of variances
## 0.5655449
var.test(PC25~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC25 by BC_data$Diagnosis
## F = 0.36848, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2882585 0.4674560
## sample estimates:
## ratio of variances
## 0.3684843
var.test(PC26~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC26 by BC_data$Diagnosis
## F = 0.43709, num df = 356, denom df = 210, p-value = 6.017e-12
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3419287 0.5544906
## sample estimates:
## ratio of variances
## 0.4370916
var.test(PC27~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC27 by BC_data$Diagnosis
## F = 0.37912, num df = 356, denom df = 210, p-value = 7.177e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2965811 0.4809525
## sample estimates:
## ratio of variances
## 0.3791232
var.test(PC28~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC28 by BC_data$Diagnosis
## F = 0.37591, num df = 356, denom df = 210, p-value = 4.008e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2940709 0.4768817
## sample estimates:
## ratio of variances
## 0.3759143
var.test(PC29~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC29 by BC_data$Diagnosis
## F = 0.40017, num df = 356, denom df = 210, p-value = 2.603e-14
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3130490 0.5076577
## sample estimates:
## ratio of variances
## 0.4001744
var.test(PC30~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC30 by BC_data$Diagnosis
## F = 0.2101, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1643607 0.2665365
## sample estimates:
## ratio of variances
## 0.2101043
var.test(PC31~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC31 by BC_data$Diagnosis
## F = 0.234, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1830544 0.2968512
## sample estimates:
## ratio of variances
## 0.2340006
var.test(PC32~BC_data$Diagnosis,data=bcatyp_pca)
##
## F test to compare two variances
##
## data: PC32 by BC_data$Diagnosis
## F = 0.2811, num df = 356, denom df = 210, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2198996 0.3566015
## sample estimates:
## ratio of variances
## 0.2811004
# Better Ways to Visualize
library(factoextra)
library(FactoMineR)
library(ggfortify)
library(psych)
library(corrplot)
library(devtools)
# Correlation
pairs.panels(BC_data[,-1],
gap = 0,
bg = c("red", "blue")[BC_data$Diagnosis],
pch=21)

pairs.panels(bca_pca$x,
gap=0,
bg = c("red", "blue")[BC_data$Diagnosis],
pch=21)

fviz_eig(bca_pca, addlabels = TRUE)

fviz_pca_var(bca_pca,col.var = "cos2",
gradient.cols = c("#FFCC00", "#CC9933", "#660033", "#330033"),
repel = TRUE)

fviz_pca_ind(bca_pca, col.ind = "cos2",
gradient.cols = c("#FFCC00", "#CC9933", "#660033", "#330033"),
repel = TRUE)

biplot(bca_pca)

autoplot(bca_pca,
data = BC_data[,-1],
loadings = TRUE,
labels = BC_data$Diagnosis)

# Different PCA Method.
res.pca <- PCA(BC_data[,-2], graph = FALSE)
print(res.pca)
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 568 individuals, described by 32 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# Visualize and Interpret PCA using these functions
#get_eigenvalue(res.pca): Extract the eigenvalues/variances of principal components
#fviz_eig(res.pca): Visualize the eigenvalues
#get_pca_ind(res.pca), get_pca_var(res.pca): Extract the results for individuals and variables, respectively.
#fviz_pca_ind(res.pca), fviz_pca_var(res.pca): Visualize the results individuals and variables, respectively.
#fviz_pca_biplot(res.pca): Make a biplot of individuals and variables.
eig.val <- get_eigenvalue(res.pca)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 1.334011e+01 41.687850471 41.68785
## Dim.2 5.740679e+00 17.939623292 59.62747
## Dim.3 2.856671e+00 8.927098271 68.55457
## Dim.4 1.990515e+00 6.220359683 74.77493
## Dim.5 1.655248e+00 5.172648992 79.94758
## Dim.6 1.240621e+00 3.876939831 83.82452
## Dim.7 9.835331e-01 3.073540996 86.89806
## Dim.8 8.581344e-01 2.681670143 89.57973
## Dim.9 6.675166e-01 2.085989522 91.66572
## Dim.10 4.617481e-01 1.442962783 93.10868
## Dim.11 4.028135e-01 1.258792329 94.36748
## Dim.12 3.494252e-01 1.091953839 95.45943
## Dim.13 2.952150e-01 0.922546992 96.38198
## Dim.14 2.606132e-01 0.814416258 97.19639
## Dim.15 2.427493e-01 0.758591484 97.95498
## Dim.16 1.558668e-01 0.487083715 98.44207
## Dim.17 9.419889e-02 0.294371543 98.73644
## Dim.18 7.918881e-02 0.247465027 98.98391
## Dim.19 5.931593e-02 0.185362274 99.16927
## Dim.20 5.203975e-02 0.162624226 99.33189
## Dim.21 4.948143e-02 0.154629475 99.48652
## Dim.22 3.106843e-02 0.097088831 99.58361
## Dim.23 3.011234e-02 0.094101070 99.67771
## Dim.24 2.756705e-02 0.086147045 99.76386
## Dim.25 2.439053e-02 0.076220391 99.84008
## Dim.26 1.816117e-02 0.056753656 99.89683
## Dim.27 1.558456e-02 0.048701748 99.94553
## Dim.28 8.140586e-03 0.025439333 99.97097
## Dim.29 6.816197e-03 0.021300616 99.99227
## Dim.30 1.596090e-03 0.004987780 99.99726
## Dim.31 7.474333e-04 0.002335729 99.99960
## Dim.32 1.288493e-04 0.000402654 100.00000
fviz_eig(res.pca, addlabels = TRUE, ylim = c(0, 50))

var <- get_pca_var(res.pca)
#var$coord: coordinates of variables to create a scatter plot
#var$cos2: represents the quality of representation for variables on the factor map. It’s calculated as the squared coordinates: var.cos2 = var.coord * var.coord.
#var$contrib: contains the contributions (in percentage) of the variables to the principal components.
#The contribution of a variable (var) to a given principal component is (in percentage) : (var.cos2 * 100) / (total cos2 of the component).
var
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
# Coordinates
head(var$coord)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ID 0.08531238 -0.0805740 0.16204864 0.03292313 0.013155817
## mean_radius 0.79636361 -0.5591083 -0.02209146 -0.06415462 -0.045516453
## mean_texture 0.39530442 -0.1352453 0.11116703 0.83803301 0.030946335
## mean_perimeter 0.82762692 -0.5148999 -0.02344368 -0.06531541 -0.045213275
## mean_area 0.80480447 -0.5524695 0.04177067 -0.07968590 -0.009409548
## mean_smoothness 0.51607144 0.4426651 -0.17509212 -0.21666598 0.481902877
# Cos2: quality on the factore map
head(var$cos2)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ID 0.007278202 0.00649217 0.0262597617 0.001083932 1.730755e-04
## mean_radius 0.634195001 0.31260214 0.0004880324 0.004115815 2.071747e-03
## mean_texture 0.156265581 0.01829128 0.0123581088 0.702299323 9.576756e-04
## mean_perimeter 0.684966323 0.26512195 0.0005496060 0.004266103 2.044240e-03
## mean_area 0.647710238 0.30522260 0.0017447886 0.006349843 8.853959e-05
## mean_smoothness 0.266329733 0.19595243 0.0306572498 0.046944147 2.322304e-01
# Contributions to the principal components
head(var$contrib)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## ID 0.05455878 0.1130906 0.91924333 0.05445487 0.010456171
## mean_radius 4.75404550 5.4453857 0.01708395 0.20677135 0.125162385
## mean_texture 1.17139630 0.3186257 0.43260518 35.28229065 0.057856938
## mean_perimeter 5.13463691 4.6183027 0.01923938 0.21432157 0.123500565
## mean_area 4.85535826 5.3168375 0.06107768 0.31900502 0.005349023
## mean_smoothness 1.99645798 3.4134013 1.07318081 2.35839189 14.029947686
#The plot Below is also known as variable correlation plots. It shows the relationships between all variables. It can be interpreted as follow:
#Positively correlated variables are grouped together.
#Negatively correlated variables are positioned on opposite sides of the plot origin (opposed quadrants).
#The distance between variables and the origin measures the quality of the variables on the factor map.
#Variables that are away from the origin are well represented on the factor map.
# Correlation circle
fviz_pca_var(res.pca, col.var = "black")

# Quality of representation
corrplot(var$cos2, is.corr=FALSE)

# Total cos2 of variables on Dim.1 and Dim.2
#A high cos2 indicates a good representation of the variable on the principal component.
#In this case the variable is positioned close to the circumference of the correlation circle.
#A low cos2 indicates that the variable is not perfectly represented by the PCs.
#In this case the variable is close to the center of the circle.
fviz_cos2(res.pca, choice = "var", axes = 1:2)

fviz_pca_var(res.pca, col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

# Change the transparency by cos2 values
fviz_pca_var(res.pca, alpha.var = "cos2")

corrplot(var$contrib, is.corr=FALSE)

# Contributions of variables to PC1
fviz_contrib(res.pca, choice = "var", axes = 1, top = 10)

# Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2, top = 10)

fviz_pca_var(res.pca, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
)

fviz_pca_var(res.pca, alpha.var = "contrib")

fviz_pca_ind(res.pca,
geom.ind = "point", # show points only (but not "text")
col.ind = BC_data$Diagnosis, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)

fviz_pca_var(res.pca, axes.linetype = "blank")

ind.p <- fviz_pca_ind(res.pca, geom = "point", col.ind = BC_data$Diagnosis)
ggpubr::ggpar(ind.p,
title = "Principal Component Analysis",
subtitle = "Breast Cancer data set",
caption = "Source: UCI",
xlab = "PC1", ylab = "PC2",
legend.title = "Diagnosis", legend.position = "top",
ggtheme = theme_gray(), palette = "jco"
)

fviz_pca_biplot(res.pca, repel = TRUE,col.ind = BC_data$Diagnosis,
col.var = "#2E9FDF", # Variables color
)

fviz_pca_biplot(res.pca,
col.ind = BC_data$Diagnosis, palette = "jco",
addEllipses = TRUE, label = "var",
col.var = "black", repel = TRUE,
legend.title = "Diagnosis")

fviz_pca_biplot(res.pca,
# Fill individuals by groups
geom.ind = "point",
pointshape = 21,
pointsize = 2.5,
fill.ind = BC_data$Diagnosis,
col.ind = "black",
# Color variable by groups
legend.title = list(fill = "Diagnosis", color = "Clusters"),
repel = TRUE # Avoid label overplotting
)+
ggpubr::fill_palette("jco")+ # Indiviual fill color
ggpubr::color_palette("npg") # Variable colors

fviz_pca_biplot(res.pca,
# Individuals
geom.ind = "point",
fill.ind = BC_data$Diagnosis, col.ind = "black",
pointshape = 21, pointsize = 2,
palette = "jco",
addEllipses = TRUE,
# Variables
alpha.var ="contrib", col.var = "contrib",
gradient.cols = "RdYlBu",
legend.title = list(fill = "Diagnosis", color = "Contrib",
alpha = "Contrib")
)
